Add more merger metrics (batches per level and in-progress merges).

gz · gz · commit 2c62b0bf0584 · 2025-02-07T18:49:42.000Z
Signed-off-by: Gerd Zellweger &lt;mail@gerdzellweger.com&gt;
diff --git a/crates/dbsp/src/circuit/metrics.rs b/crates/dbsp/src/circuit/metrics.rs
@@ -57,7 +57,7 @@ pub const COMPACTION_SIZE_SAVINGS: &str = "file.compaction_size";
 /// Compaction duration for a single batch.
 pub const COMPACTION_DURATION: &str = "file.compaction_duration";
 
-/// Time a worker was stalled waiting for more merges to complete.
+/// Time in nanoseconds a worker was stalled waiting for more merges to complete.
 pub const COMPACTION_STALL_TIME: &str = "file.compaction_stall_time";
 
 /// Number of records dropped due to LATENESS annotations
@@ -66,6 +66,12 @@ pub const TOTAL_LATE_RECORDS: &str = "records.late";
 /// Runtime in microseconds of an Operator evaluation
 pub const OPERATOR_EVAL_DURATION: &str = "operator.runtime_micros";
 
+/// Number of batches in the spines at each level.
+pub const BATCHES_PER_LEVEL: &str = "spine.batches_per_level";
+
+/// Number of pending merges in spines at each level.
+pub const ONGOING_MERGES_PER_LEVEL: &str = "spine.ongoing_merges";
+
 /// Creates the appropriate metric name for this metric.
 /// As these metrics are DBSP related, they are prefixed with `dbsp_`.
 fn metric_name(name: &str) -> String {
diff --git a/crates/dbsp/src/circuit/runtime.rs b/crates/dbsp/src/circuit/runtime.rs
@@ -502,6 +502,26 @@ impl Runtime {
         WORKER_INDEX.get()
     }
 
+    /// Returns the worker index as a string.
+    ///
+    /// This is useful for metric labels.
+    pub fn worker_index_str() -> &'static str {
+        static WORKER_INDEX_STRS: Lazy<[&'static str; 256]> = Lazy::new(|| {
+            let mut data: [&'static str; 256] = [""; 256];
+            for (i, item) in data.iter_mut().enumerate() {
+                *item = Box::leak(i.to_string().into_boxed_str());
+            }
+            data
+        });
+
+        WORKER_INDEX_STRS
+            .get(WORKER_INDEX.get())
+            .copied()
+            .unwrap_or_else(|| {
+                panic!("Limit workers to less than 256 or increase the limit in the code.")
+            })
+    }
+
     /// Returns the minimum number of bytes in a batch to spill it to storage,
     /// or `None` if this thread doesn't have a [Runtime] or if it doesn't have
     /// storage configured.
diff --git a/crates/dbsp/src/trace/spine_async/mod.rs b/crates/dbsp/src/trace/spine_async/mod.rs
@@ -15,14 +15,14 @@ use crate::{
         cursor::CursorList, merge_batches, Batch, BatchReader, BatchReaderFactories, Cursor,
         Filter, Trace,
     },
-    Error, NumEntries,
+    Error, NumEntries, Runtime,
 };
 
 use crate::storage::file::to_bytes;
 use crate::storage::write_commit_metadata;
 pub use crate::trace::spine_async::snapshot::SpineSnapshot;
 use crate::trace::CommittedSpine;
-use metrics::counter;
+use metrics::{counter, gauge};
 use ouroboros::self_referencing;
 use rand::Rng;
 use rkyv::{
@@ -44,19 +44,23 @@ use std::{
     sync::Condvar,
 };
 use textwrap::indent;
+use uuid::Uuid;
 
 mod list_merger;
 mod snapshot;
 mod thread;
 
 use self::thread::{BackgroundThread, WorkerStatus};
 use super::BatchLocation;
-use crate::circuit::metrics::COMPACTION_STALL_TIME;
+use crate::circuit::metrics::{BATCHES_PER_LEVEL, COMPACTION_STALL_TIME, ONGOING_MERGES_PER_LEVEL};
 use list_merger::{ListMerger, ListMergerBuilder};
 
 /// Maximum amount of levels in the spine.
 pub(crate) const MAX_LEVELS: usize = 9;
 
+/// Levels as &'static str for metrics
+pub(crate) const LEVELS_AS_STR: [&str; MAX_LEVELS] = ["0", "1", "2", "3", "4", "5", "6", "7", "8"];
+
 impl<B: Batch + Send + Sync> From<(Vec<String>, &Spine<B>)> for CommittedSpine<B> {
     fn from((batches, spine): (Vec<String>, &Spine<B>)) -> Self {
         CommittedSpine {
@@ -162,6 +166,9 @@ where
     request_exit: bool,
     #[size_of(skip)]
     merge_stats: MergeStats,
+    /// Unique identifier for the spine for metrics.
+    #[size_of(skip)]
+    ident: &'static str,
 }
 
 impl<B> SharedState<B>
@@ -176,6 +183,7 @@ where
             slots: std::array::from_fn(|_| Slot::default()),
             request_exit: false,
             merge_stats: MergeStats::default(),
+            ident: String::leak(Uuid::now_v7().to_string()),
         }
     }
 
@@ -184,9 +192,7 @@ where
     fn add_batches(&mut self, batches: impl IntoIterator<Item = Arc<B>>) {
         for batch in batches {
             if !batch.is_empty() {
-                self.slots[Spine::<B>::size_to_level(batch.len())]
-                    .loose_batches
-                    .push_back(batch);
+                self.add_batch(batch);
             }
         }
     }
@@ -197,6 +203,9 @@ where
         debug_assert!(!batch.is_empty());
         let level = Spine::<B>::size_to_level(batch.len());
         self.slots[level].loose_batches.push_back(batch);
+
+        gauge!(BATCHES_PER_LEVEL, "worker" => Runtime::worker_index_str(), "level" => LEVELS_AS_STR[level], "id" => self.ident)
+            .set(self.slots[level].n_batches() as f64);
     }
 
     fn should_apply_backpressure(&self) -> bool {
@@ -256,6 +265,8 @@ where
         let cache_stats = batches.iter().fold(CacheStats::default(), |stats, batch| {
             stats + batch.cache_stats()
         });
+        gauge!(ONGOING_MERGES_PER_LEVEL, "worker" => Runtime::worker_index_str(), "level" => LEVELS_AS_STR[level], "id" => self.ident)
+            .set(0);
         self.merge_stats.report_merge(
             batches.iter().map(|b| b.len()).sum(),
             new_batch.len(),
@@ -396,7 +407,7 @@ where
             let start = Instant::now();
             let mut state = self.no_backpressure.wait(state).unwrap();
             state.merge_stats.backpressure_wait += start.elapsed();
-            counter!(COMPACTION_STALL_TIME).increment(start.elapsed().as_secs());
+            counter!(COMPACTION_STALL_TIME).increment(start.elapsed().as_nanos() as u64);
         }
     }
 
@@ -514,6 +525,7 @@ where
         idle: &Arc<Condvar>,
         no_backpressure: &Arc<Condvar>,
     ) -> WorkerStatus {
+        let ident = state.lock().unwrap().ident;
         // Run in-progress merges.
         let ((key_filter, value_filter), frontier) = {
             let shared = state.lock().unwrap();
@@ -546,6 +558,9 @@ where
             .filter_map(|(level, slot)| slot.try_start_merge(level).map(|batches| (level, batches)))
             .collect::<Vec<_>>();
         for (level, batches) in start_merges {
+            gauge!(ONGOING_MERGES_PER_LEVEL, "worker" => Runtime::worker_index_str(), "level" => LEVELS_AS_STR[level], "id" => ident)
+                .set(batches.len() as f64);
+
             let merger = ListMergerBuilder::with_capacity(batches.len())
                 .with_batches(batches)
                 .build();
diff --git a/scripts/plot_metrics.py b/scripts/plot_metrics.py
@@ -0,0 +1,122 @@
+import pandas as pd
+import json
+from plotnine import *
+
+
+def get_data_samples(file_path):
+    """
+    Reads an NDJSON file and returns a list of JSON objects.
+
+    :param file_path: Path to the NDJSON file
+    :return: List of JSON objects
+    """
+    json_list = []
+
+    with open(file_path, "r", encoding="utf-8") as file:
+        for line in file:
+            json_list.append(json.loads(line.strip()))  # Parse each line as JSON
+
+    return json_list
+
+
+# Function to process the data
+def parse_data(data_samples):
+    records = []
+    for timestamp, sample in enumerate(data_samples):
+        for entry in sample:
+            key = entry["key"]
+            value = entry["value"].get("Gauge") or entry["value"].get("Counter")  # Handle Gauge and Counter values
+            labels = {label[0]: label[1] for label in entry.get("labels", [])}  # Convert labels to dict
+
+            # Extract necessary information
+            worker = labels.get("worker", "Total")
+            level = labels.get("level", "Total")
+            spine = labels.get("id", "Total")
+
+            # Append processed entry
+            records.append({
+                "timestamp": timestamp,
+                "key": key,
+                "worker": worker,
+                "level": level,
+                "value": value,
+                "id": spine
+            })
+
+    return pd.DataFrame(records)
+
+
+def make_plots(data_samples):
+    # Convert the data
+    df = parse_data(data_samples)
+
+    # Filter for specific metrics
+    df_merges = df[df["key"] == "spine.ongoing_merges"]
+    df_merges_summary = df_merges.groupby(["timestamp", "level"])["value"].agg(
+        ["mean", "max", "min"]).reset_index().melt(
+        id_vars=["timestamp", "level"], var_name="stat", value_name="value")
+
+    df_batches = df[df["key"] == "spine.batches_per_level"]
+    df_batches_summary = df_batches.groupby(["timestamp", "level"])["value"].agg(
+        ["mean", "max", "min"]).reset_index().melt(
+        id_vars=["timestamp", "level"], var_name="stat", value_name="value")
+
+    # Get bytes written at every time
+    df_disk = df[df["key"] == "disk.total_bytes_written"]
+    df_disk = df_disk.sort_values("timestamp")
+    df_disk["value"] = df_disk["value"].diff() / (1024 * 1024)
+    df_disk = df_disk.dropna()
+    df_disk["metric"] = "Writes MiB/s"
+
+    # Aggregate total values per timestamp
+    df_merges_total = df_merges.groupby("timestamp")["value"].sum().reset_index()
+    df_merges_total["worker"] = "Total"
+    df_merges_total["level"] = "Total"
+    df_merges_total["metric"] = "Current #Batches being merged"
+
+    df_batches_total = df_batches.groupby("timestamp")["value"].sum().reset_index()
+    df_batches_total["worker"] = "Total"
+    df_batches_total["level"] = "Total"
+    df_batches_total["metric"] = "Current #Batches not being merged"
+
+    # Merge with original to include total lines
+    df_merges = pd.concat([df_merges_summary])
+    df_batches = pd.concat([df_batches_summary])
+
+    df_totals = pd.concat([df_disk, df_merges_total, df_batches_total])
+
+    # Plot function
+    def create_plot(df, title, filename):
+        plot = (
+                ggplot(df, aes(x="timestamp", y="value", color="stat", group="stat"))
+                + geom_line(size=1)
+                + facet_wrap("~level", scales="free")
+                + labs(title=title, x="Time", y="Value")
+                + theme_classic()
+                + scale_y_continuous(limits=(0, None))
+        )
+        plot.save(filename, width=12, height=6, dpi=300)
+        print(f"Saved {filename}")
+
+    # Generate plots
+    create_plot(df_merges, "Current #Batches being merged (Min/Avg/Max from all Spines)", "ongoing_merges.png")
+    create_plot(df_batches, "Current #Batches not being merged (Min/Avg/Max from all Spine)", "batches_per_level.png")
+
+    plot = (
+            ggplot(df_totals, aes(x="timestamp", y="value", color="metric"))
+            + geom_line(size=1)
+            + facet_grid("metric ~ .", scales="free_y")  # Separate plots for MiB/s and Counts
+            + labs(title="Pipeline Totals", x="Time", y="Value")
+            + theme_classic()
+            + scale_y_continuous(limits=(0, None))
+    )
+
+    # Save the plot
+    plot.save("pipeline_totals.png", width=12, height=8, dpi=300)
+
+
+if __name__ == '__main__':
+    import sys
+
+    samples = get_data_samples(sys.argv[1])
+    make_plots(samples)