Advanced use cases

Advanced Python scripting using Marple SDK

Have a more complex analysis in mind? We've put together a collection of useful code snippets to help you get started.

chevron-rightHeatmaphashtag
#! /usr/bin/env python3
# /// script
# dependencies = [
#    "pandas",
#    "matplotlib",
#    "marpledata",
#    "tqdm",
# ]
# ///

"""
Parameters
"""
MDB_TOKEN = "<MDB_TOKEN>"
MDB_HOST = "db.marpledata.com"
STREAM = "Car data"

X_NAME = "engine_speed"
X_UNIT = "rpm"
Y_NAME = "engine_torque"
Y_UNIT = "Nm"

# Optional figure export (set to None to disable saving)
SAVE_PATH = "heatmap-4k.png"
SAVE_WIDTH_PX = 3840
SAVE_HEIGHT_PX = 2160
SAVE_DPI = 300


from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
from marple import DB
from marple.db import DataStream
from matplotlib.colors import LogNorm
from tqdm.auto import tqdm


def heatmap_via_python_sdk(stream: DataStream, x: str, y: str, x_unit: str = None, y_unit: str = None):
    datasets = stream.get_datasets()
    n_datasets = len(datasets)
    print(f"[1/4] Stream: {stream.name}")
    print(f"      Signals: {x}, {y}")
    print(f"      Datasets: {n_datasets}")

    print("[2/4] Downloading and resampling data (1s)...")
    dataset_frames = []
    for _, dataset_df in tqdm(
        datasets.get_data([x, y], dtype="numeric", resample_rule="1s"),
        total=n_datasets,
        desc="Datasets",
    ):
        dataset_frames.append(dataset_df)

    if not dataset_frames:
        print("No data returned for the selected stream/signals.")
        return

    cache_files = list(Path(db.client.cache_folder).glob("**/*.parquet"))
    cache_size_mb = sum(file.stat().st_size for file in cache_files) / 1024 / 1024
    print(f"[3/4] Cache size: {cache_size_mb:.2f} MB ({len(cache_files)} files)")

    df = pd.concat(dataset_frames, ignore_index=True).dropna(subset=[x, y])
    if df.empty:
        print("Data downloaded, but no valid rows remain after dropping missing values.")
        return

    print(f"[4/4] Plotting heatmap from {len(df):,} samples...")
    fig, ax = plt.subplots()
    cmap = plt.get_cmap("hot").copy()
    cmap.set_bad("black")
    h = ax.hist2d(df[x], df[y], bins=200, cmap=cmap, cmin=1, norm=LogNorm())
    fig.colorbar(h[3], ax=ax, label="Time [s]")
    ax.set_xlabel(f"{x} [{x_unit}]" if x_unit else x)
    ax.set_ylabel(f"{y} [{y_unit}]" if y_unit else y)
    ax.set_title(f"{x} vs {y} Heatmap (Stream: {stream.name}, {len(dataset_frames)} datasets)")
    fig.tight_layout()
    if SAVE_PATH:
        fig.set_size_inches(SAVE_WIDTH_PX / SAVE_DPI, SAVE_HEIGHT_PX / SAVE_DPI)
        fig.savefig(SAVE_PATH, dpi=SAVE_DPI)
        print(f"Saved {SAVE_PATH} ({SAVE_WIDTH_PX}x{SAVE_HEIGHT_PX}px @ {SAVE_DPI} dpi)")
    plt.show()


if __name__ == "__main__":
    db = DB(api_token=MDB_TOKEN, api_url=f"https://{MDB_HOST}/api/v1")
    stream = db.get_stream(STREAM)
    heatmap_via_python_sdk(stream, X_NAME, Y_NAME, X_UNIT, Y_UNIT)

Last updated