diff --git a/cbutil/ncu_parser.py b/cbutil/ncu_parser.py index 820ae26645979cceb04805ba6b7ed635a20c8472..327344e5d342f67a7344b3166a15c6ca71c7540b 100644 --- a/cbutil/ncu_parser.py +++ b/cbutil/ncu_parser.py @@ -1,5 +1,6 @@ import numpy as np import pandas as pd +import re from cbutil.ncu_keys import ( memory_write_data_key, memory_read_data_key, @@ -27,17 +28,17 @@ def get_unit(col): def detect_prefix(unit): - if unit[0] == 'G': + if unit[0] == "G": return 1e9 - elif unit[0] == 'M': + elif unit[0] == "M": return 1e6 - elif unit[0] == 'K': + elif unit[0] == "K": return 1e3 - elif unit[0] == 'm': + elif unit[0] == "m": return 1e-3 - elif unit[0] == 'u': + elif unit[0] == "u": return 1e-6 - elif unit[0] == 'n': + elif unit[0] == "n": return 1e-9 else: return 1 @@ -61,35 +62,57 @@ def normalize_and_add_prefix(value, prefix: str): def extract_raw_counter(df: pd.DataFrame): - fields = pd.DataFrame() tags = pd.DataFrame() tags["Block Size"] = df["Block Size"] tags["Grid Size"] = df["Grid Size"] - tags["GPU"] = df["device__attribute_display_name"] + tags["GPU"] = df["device__attribute_display_name"].str.replace(" ", "") - fields[memory_write_data_key] = normalize_and_add_prefix(df["dram__bytes_write.sum"], 'G') - fields[memory_read_data_key] = normalize_and_add_prefix(df["dram__bytes_read.sum"], 'G') - fields[memory_data_key] = fields[memory_write_data_key] + fields[memory_read_data_key] + fields[memory_write_data_key] = normalize_and_add_prefix( + df["dram__bytes_write.sum"], "G" + ) + fields[memory_read_data_key] = normalize_and_add_prefix( + df["dram__bytes_read.sum"], "G" + ) + fields[memory_data_key] = ( + fields[memory_write_data_key] + fields[memory_read_data_key] + ) - fields[memory_write_bandwidth_key] = normalize_and_add_prefix(df["dram__bytes_write.sum.per_second"], 'M') - fields[memory_read_bandwidth_key] = normalize_and_add_prefix(df["dram__bytes_read.sum.per_second"], 'M') - fields[memory_bandwidth_key] = normalize_and_add_prefix(df["dram__bytes.sum.per_second"], 'M') + fields[memory_write_bandwidth_key] = normalize_and_add_prefix( + df["dram__bytes_write.sum.per_second"], "M" + ) + fields[memory_read_bandwidth_key] = normalize_and_add_prefix( + df["dram__bytes_read.sum.per_second"], "M" + ) + fields[memory_bandwidth_key] = normalize_and_add_prefix( + df["dram__bytes.sum.per_second"], "M" + ) fields[runtime_key] = get_normalized(df["gpu__time_duration.sum"]) - fields[smsp_cycles_key] = get_normalized(df["smsp__cycles_elapsed.avg.per_second"]) + fields[smsp_cycles_key] = get_normalized( + df["smsp__cycles_elapsed.avg.per_second"]) fields[smsp_cycles_total_key] = fields[smsp_cycles_key] * fields[runtime_key] fields[fp_inst_per_cycle_key] = ( - 2 * df["smsp__sass_thread_inst_executed_op_dfma_pred_on.sum.per_cycle_elapsed"] + - df["smsp__sass_thread_inst_executed_op_dadd_pred_on.sum.per_cycle_elapsed"] + - df["smsp__sass_thread_inst_executed_op_dmul_pred_on.sum.per_cycle_elapsed"] + 2 * df["smsp__sass_thread_inst_executed_op_dfma_pred_on.sum.per_cycle_elapsed"] + + df["smsp__sass_thread_inst_executed_op_dadd_pred_on.sum.per_cycle_elapsed"] + + df["smsp__sass_thread_inst_executed_op_dmul_pred_on.sum.per_cycle_elapsed"] + ) + fields[total_fp_inst_key] = ( + fields[fp_inst_per_cycle_key] * fields[smsp_cycles_total_key] ) - fields[total_fp_inst_key] = fields[fp_inst_per_cycle_key] * fields[smsp_cycles_total_key] - fields[operational_intensity_key] = fields[total_fp_inst_key] / (fields[memory_data_key] * 1e9) - fields[p_max_key] = add_unit_prefix(fields[operational_intensity_key] * fields[memory_bandwidth_key] * 1e6, 'M') - fields[dp_key] = np.divide(np.asarray(fields[total_fp_inst_key]), fields[runtime_key]) / 1e6 + fields[operational_intensity_key] = fields[total_fp_inst_key] / ( + fields[memory_data_key] * 1e9 + ) + fields[p_max_key] = add_unit_prefix( + fields[operational_intensity_key] * + fields[memory_bandwidth_key] * 1e6, "M" + ) + fields[dp_key] = ( + np.divide(np.asarray(fields[total_fp_inst_key]), + fields[runtime_key]) / 1e6 + ) return fields, tags diff --git a/dashboards/dashboard_walberla.py b/dashboards/dashboard_walberla.py index 15358a910eb798876b4ce245b1c84d834287b5ca..bdbbe048dc8e46862244be682b9563044cc60b1d 100644 --- a/dashboards/dashboard_walberla.py +++ b/dashboards/dashboard_walberla.py @@ -618,6 +618,8 @@ def dashboard_fslbmgravitywave(): Filter("project_id", multi=True, default_value="walberla/walberla"), Filter("branch", multi=True, default_value="master"), Filter("numMPIProcesses"), + Filter("barrierAfterSweep"), + Filter("blockDecomposition"), ] fields = [ diff --git a/tests/test_ncu_parser.py b/tests/test_ncu_parser.py index ab38be61f32dab3c068cc8f27747b26e63cbcf3f..c49460e6b67dccedfbd9668dd29de0e44c69e87c 100644 --- a/tests/test_ncu_parser.py +++ b/tests/test_ncu_parser.py @@ -10,7 +10,7 @@ def sample_data(): data = { ("Block Size", ""): [128, 256, 512], ("Grid Size", ""): [64, 128, 256], - ("device__attribute_display_name", ""): ["GPU1", "GPU2", "GPU3"], + ("device__attribute_display_name", ""): [" GPU 1", "GPU2", "GPU 3"], ("dram__bytes_write.sum", "Bytes"): [1e9, 2e9, 3e9], ("dram__bytes_read.sum", "GBytes"): [0.5, 1., 1.5], ("dram__bytes_write.sum.per_second", "MByte/s"): [100, 200, 300],