[no main fork] Refactor post-processing script to add command-line arguments...

[no main fork] Refactor post-processing script to add command-line arguments for database input and verbose output

[no main fork] Refactor post-processing script to add command-line arguments...
07ecad0f · Michael Zikeli · f9a339c6 · 07ecad0f
Commit 07ecad0f authored 3 months ago by Michael Zikeli
--- a/post_processing/investigateBenchmarkingTimerOutput.py
+++ b/post_processing/investigateBenchmarkingTimerOutput.py
 # %%
 import sqlite3
 import pandas as pd
-import os
-import matplotlib.pyplot as plt
 import warnings
+import argparse
-# %%
+def main(databasename: str, verbose: bool) -> None:
-# Define colors
+    """
-color1 = "#1F3B66"
+    Main function to process the benchmarking timer output from the SQLite database.
-color2 = "#C14C30"
-color3 = "#4AA7A9"
-color4 = "#EBAE65"
-color5 = "#EBAE65"
-color6 = "#36b700"
-color7 = "#9d2c00"
-color8 = "#c8c8c8"
-# %%
+    Args:
-# Database configuration
+        databasename (str): Path to the SQLite database file.
-GPUs_per_node = 4  # MareNostrum5 ACC has four GPUs per node
+        verbose (bool): Flag to enable verbose output.
-databasename = "/local/ab04unyc/mywalberla/build/benchmark-debug/apps/benchmarks/UniformGridCPU/cpu_benchmark.sqlite3"
+    """
+    # Database configuration
+    GPUs_per_node = 4  # MareNostrum5 ACC has four GPUs per node
-# Connect to the database
+    # Connect to the database
-conn = sqlite3.connect(databasename)
+    conn = sqlite3.connect(databasename)
-cursor = conn.cursor()
+    cursor = conn.cursor()
-cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
+    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
-all_tables = cursor.fetchall()
+    all_tables = cursor.fetchall()
-# Extract table names
+    # Extract table names
-all_table_names = [str(t[0]) for t in all_tables]
+    all_table_names = [str(t[0]) for t in all_tables]
-# %%
+    # Debug output
-# Debug output
+    if verbose:
-all_table_names
+        print("Avalable table names in the database:", all_table_names)
-# %%
+    # Load data from all tables
-# Load data from all tables
+    for table_name in all_table_names:
-for table_name in all_table_names:
+        df = pd.read_sql(f"SELECT * FROM {table_name}", conn)
-    df = pd.read_sql(f"SELECT * FROM {table_name}", conn)
+        if 'full_df' in globals():
-    if 'full_df' in globals():
+            full_df = pd.concat([full_df, df], ignore_index=True)
-        full_df = pd.concat([full_df, df], ignore_index=True)
+        else:
-    else:
+            full_df = df.copy()
-        full_df = df.copy()
-# Debug output
+    # Debug output
-df.columns
+    if verbose:
+        print("Available columns in the database:", df.columns)
+        print(f"Number of rows in {all_table_names}: {len(full_df)}")
-# %%
+    # Extract unique timer names and properties
-# Extract unique timer names and properties
+    timer_columns = df.filter(like='Timer')
-timer_columns = df.filter(like='Timer')
+    timer_names = list(set(col.split('_')[1] for col in timer_columns.columns))
-timer_names = list(set(col.split('_')[1] for col in timer_columns.columns))
+    properties = list(set(col.split('_')[2] for col in timer_columns.columns))
-properties = list(set(col.split('_')[2] for col in timer_columns.columns))
+    # Create a new dataframe with properties as rows and timer names as columns
+    grouped_timers = pd.DataFrame(columns=properties, index=timer_names)
-# Create a new dataframe with properties as rows and timer names as columns
+    # Fill the new dataframe with the corresponding values
-grouped_timers = pd.DataFrame(columns=properties, index=timer_names)
+    for timer in timer_names:
+        for prop in properties:
+            timer_name = f'Timer_{timer}_{prop}'
+            try:
+                grouped_timers.at[timer, prop] = timer_columns[timer_name].iloc[0]
+            except KeyError:
+                warnings.warn(f"{timer_name} not found in timer_columns")
+    # Display the grouped timers
+    if verbose:
+        print(f"Processing database file: {databasename}")
+    else:
+        print(f"Processing database file: {databasename.split('/')[-1]}")
+    print(grouped_timers)
-# Fill the new dataframe with the corresponding values
+if __name__ == "__main__":
-for timer in timer_names:
+    parser = argparse.ArgumentParser(description="Process benchmarking timer output from an SQLite database.")
-    for prop in properties:
+    parser.add_argument("databasename", type=str, help="Path to the SQLite database file")
-        timer_name = f'Timer_{timer}_{prop}'
+    parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output")
-        try:
-            grouped_timers.at[timer, prop] = timer_columns[timer_name].iloc[0]
+    args = parser.parse_args()
-        except KeyError:
-            warnings.warn(f"{timer_name} not found in timer_columns")
-# Display the grouped timers
+    main(args.databasename, args.verbose)
-grouped_timers
\ No newline at end of file
\ No newline at end of file