diff --git a/cbutil/ncu_keys.py b/cbutil/ncu_keys.py new file mode 100644 index 0000000000000000000000000000000000000000..ef78f52d1f73584a0382bde256ad28248c450e39 --- /dev/null +++ b/cbutil/ncu_keys.py @@ -0,0 +1,14 @@ +memory_write_data_key = "Memory write data volume [GBytes]" +memory_read_data_key = "Memory read data volume [GBytes]" +memory_data_key = "Memory data volume [GBytes]" +memory_write_bandwidth_key = "Memory write bandwidth [MByte/s]" +memory_read_bandwidth_key = "Memory read bandwidth [MByte/s]" +memory_bandwidth_key = "Memory bandwidth [MByte/s]" +runtime_key = "Runtime [s]" +smsp_cycles_key = "SMSP Cycles [Cycles/s]" +smsp_cycles_total_key = "SMSP Cycles" +fp_inst_per_cycle_key = "FP inst per cycle" +total_fp_inst_key = "Total FP inst" +operational_intensity_key = "Operational intensity" +p_max_key = "P_max [MFlop/s]" +dp_key = "DP [MFlop/s]" diff --git a/cbutil/ncu_parser.py b/cbutil/ncu_parser.py index 87b3280d7cc1ab3e4e03fff83092a3d41cecff76..820ae26645979cceb04805ba6b7ed635a20c8472 100644 --- a/cbutil/ncu_parser.py +++ b/cbutil/ncu_parser.py @@ -1,5 +1,21 @@ import numpy as np import pandas as pd +from cbutil.ncu_keys import ( + memory_write_data_key, + memory_read_data_key, + memory_data_key, + memory_write_bandwidth_key, + memory_read_bandwidth_key, + memory_bandwidth_key, + runtime_key, + smsp_cycles_key, + smsp_cycles_total_key, + fp_inst_per_cycle_key, + total_fp_inst_key, + operational_intensity_key, + p_max_key, + dp_key, +) def parse_ncu_csv(file_name) -> pd.DataFrame: @@ -53,27 +69,27 @@ def extract_raw_counter(df: pd.DataFrame): tags["Grid Size"] = df["Grid Size"] tags["GPU"] = df["device__attribute_display_name"] - fields["Memory write data volume [GBytes]"] = normalize_and_add_prefix(df["dram__bytes_write.sum"], 'G') - fields["Memory read data volume [GBytes]"] = normalize_and_add_prefix(df["dram__bytes_read.sum"], 'G') - fields["Memory data volume [GBytes]"] = fields["Memory write data volume [GBytes]"] + \ - fields["Memory read data volume [GBytes]"] + fields[memory_write_data_key] = normalize_and_add_prefix(df["dram__bytes_write.sum"], 'G') + fields[memory_read_data_key] = normalize_and_add_prefix(df["dram__bytes_read.sum"], 'G') + fields[memory_data_key] = fields[memory_write_data_key] + fields[memory_read_data_key] - fields["Memory write bandwidth [MByte/s]"] = normalize_and_add_prefix(df["dram__bytes_write.sum.per_second"], 'M') - fields["Memory read bandwidth [MByte/s]"] = normalize_and_add_prefix(df["dram__bytes_read.sum.per_second"], 'M') - fields["Memory bandwidth [MByte/s]"] = normalize_and_add_prefix(df["dram__bytes.sum.per_second"], 'M') - fields["Runtime [s]"] = get_normalized(df["gpu__time_duration.sum"]) + fields[memory_write_bandwidth_key] = normalize_and_add_prefix(df["dram__bytes_write.sum.per_second"], 'M') + fields[memory_read_bandwidth_key] = normalize_and_add_prefix(df["dram__bytes_read.sum.per_second"], 'M') + fields[memory_bandwidth_key] = normalize_and_add_prefix(df["dram__bytes.sum.per_second"], 'M') + fields[runtime_key] = get_normalized(df["gpu__time_duration.sum"]) - fields["SMSP Cycles [Cycles/s]"] = get_normalized(df["smsp__cycles_elapsed.avg.per_second"]) - fields["SMSP Cycles"] = fields["SMSP Cycles [Cycles/s]"] * fields["Runtime [s]"] - fields["FP inst per cycle"] = 2 * df["smsp__sass_thread_inst_executed_op_dfma_pred_on.sum.per_cycle_elapsed"] + \ - df["smsp__sass_thread_inst_executed_op_dadd_pred_on.sum.per_cycle_elapsed"] + \ + fields[smsp_cycles_key] = get_normalized(df["smsp__cycles_elapsed.avg.per_second"]) + fields[smsp_cycles_total_key] = fields[smsp_cycles_key] * fields[runtime_key] + fields[fp_inst_per_cycle_key] = ( + 2 * df["smsp__sass_thread_inst_executed_op_dfma_pred_on.sum.per_cycle_elapsed"] + + df["smsp__sass_thread_inst_executed_op_dadd_pred_on.sum.per_cycle_elapsed"] + df["smsp__sass_thread_inst_executed_op_dmul_pred_on.sum.per_cycle_elapsed"] - fields["Total FP inst"] = fields["FP inst per cycle"] * fields["SMSP Cycles"] + ) + fields[total_fp_inst_key] = fields[fp_inst_per_cycle_key] * fields[smsp_cycles_total_key] - fields["Operational intensity"] = fields["Total FP inst"] / (fields["Memory data volume [GBytes]"] * 1e9) - fields["P_max [MFlop/s]"] = add_unit_prefix(fields["Operational intensity"] - * fields["Memory bandwidth [MByte/s]"] * 1e6, 'M') - fields["DP [MFlop/s]"] = np.divide(np.asarray(fields["Total FP inst"]), fields["Runtime [s]"]) / 1e6 + fields[operational_intensity_key] = fields[total_fp_inst_key] / (fields[memory_data_key] * 1e9) + fields[p_max_key] = add_unit_prefix(fields[operational_intensity_key] * fields[memory_bandwidth_key] * 1e6, 'M') + fields[dp_key] = np.divide(np.asarray(fields[total_fp_inst_key]), fields[runtime_key]) / 1e6 return fields, tags diff --git a/dashboards/dashboard_pystencils.py b/dashboards/dashboard_pystencils.py index 3d4f7c7fb3b3460dd5ea8b3947a9ea03ffbfda96..67c2f85ecda0e55e6e1efd44946a9d2ba8c5a1e0 100644 --- a/dashboards/dashboard_pystencils.py +++ b/dashboards/dashboard_pystencils.py @@ -11,6 +11,19 @@ from dashboards.variables import get_dashboard_variable, Filter, get_measurement from dashboards.influx_queries import join_variable_and from dashboards.legends import Units +from cbutil.ncu_keys import ( + memory_write_data_key, + memory_read_data_key, + memory_data_key, + memory_write_bandwidth_key, + memory_read_bandwidth_key, + memory_bandwidth_key, + runtime_key, + operational_intensity_key, + p_max_key, + dp_key, +) + INTEL_LINESTYLE = "solid" GCC_LINESTYLE = "dashed" @@ -75,3 +88,64 @@ def dashboard_pystencils_cpu(): rows=[row], templating=[*filter_vars, benchmark], annotations=annotations) + + +def dashboard_pystencils_gpu(): + data_source = "pystencils" + row_repeat = "host" + options = DashboardOptions( + title="pystencils GPU Benchmarks", + description="Benchmarks for pystencils", + tags=['benchmark', 'pystencils', 'GPU'], + timezone="browser", + ) + + filters = [ + Filter("host", default_value="medusa"), + Filter("PYSTENCILS_PROJECT_ID", default_value="pycodegen/pystencils"), + Filter("PYSTENCILS_BRANCH", default_value="master"), + Filter("GPU"), + ] + + fields = [PanelInfos(runtime_key, Units.seconds), + PanelInfos(dp_key, Units.mflop_sec), + PanelInfos(p_max_key, Units.mflop_sec), + PanelInfos(f'{dp_key}"/"{p_max_key}', Units.percent), + PanelInfos(operational_intensity_key, Units.flop_per_byte), + PanelInfos(memory_bandwidth_key, Units.mbytes_per_second), + PanelInfos(memory_write_bandwidth_key, Units.mbytes_per_second), + PanelInfos(memory_read_bandwidth_key, Units.mbytes_per_second), + PanelInfos(memory_data_key, Units.gigabyte), + PanelInfos(memory_write_data_key, Units.gigabyte), + PanelInfos(memory_read_data_key, Units.gigabyte), + ] + + filter_vars = [get_dashboard_variable(filter, "", data_source) for filter in filters] + benchmark = get_measurement_filter("benchmark", data_source, filter_pattern="_gpu$") + + row_repeat_var = [fv for fv in filter_vars if fv.name == row_repeat][0] + + where = join_variable_and([f.name for f in filters]) + annotations = get_commit_annotation(data_source, "red", "commits", "vadd_gpu", commit_key="pystencils-commit") + group_by = [f.name for f in filters] + group_by.append("array_shape") + + panels = [ + get_time_series_panel( + field, + data_source, + f"/^${benchmark.name}$/", + where=where, + group_by=group_by, + ) + for field in fields] + + row = pack_in_row( + title=f"{row_repeat}: ${row_repeat_var.name}", + panels=[*panels], + repeat=Repeat('v', row_repeat_var.name), + ) + return build_dashboard(options, + rows=[row], + templating=[*filter_vars, benchmark], + annotations=annotations) diff --git a/dashboards/deploy.py b/dashboards/deploy.py index be0800978ff2c672ee1ec8d0bceb0ee4d8704ed9..ab55de7f8751dee5996db421321922c63591a738 100644 --- a/dashboards/deploy.py +++ b/dashboards/deploy.py @@ -4,7 +4,7 @@ import logging import dashboards.dashboard_list as boards from dashboards.upload import upload_dashboard from dashboards.dashboard_fe2ti import dashboard_fe2ti -from dashboard_pystencils import dashboard_pystencils_cpu +from dashboard_pystencils import dashboard_pystencils_cpu, dashboard_pystencils_gpu from dashboard_walberla import dashboard_uniformgridcpu, dashboard_uniformgridgpu logger = logging.getLogger(__file__) @@ -40,6 +40,7 @@ def main(): upload_dashboard(dashboard_fe2ti(), folder=fe2ti_folder) elif "pystencils" in board_name: upload_dashboard(dashboard_pystencils_cpu(), folder=pystencils_foler) + upload_dashboard(dashboard_pystencils_gpu(), folder=pystencils_foler) elif "walberla" in board_name: upload_dashboard(dashboard_uniformgridcpu(), folder=walberla_folder) upload_dashboard(dashboard_uniformgridgpu(), folder=walberla_folder) diff --git a/tests/test_dashboard_creation.py b/tests/test_dashboard_creation.py index 9acea28ba9313b166a423fadf11aa74828fc73a2..51030173a31f99a9feffc9e6143acbef6a6d71ca 100644 --- a/tests/test_dashboard_creation.py +++ b/tests/test_dashboard_creation.py @@ -6,7 +6,7 @@ from dashboards.dashboard_base import (get_commit_annotation, get_influx_target) from dashboards.dashboard_list import dashboard_uniformGridGPU from dashboards.dashboard_fe2ti import dashboard_fe2ti -from dashboards.dashboard_pystencils import dashboard_pystencils_cpu +from dashboards.dashboard_pystencils import dashboard_pystencils_cpu, dashboard_pystencils_gpu from dashboards.dashboard_walberla import dashboard_uniformgridgpu, dashboard_uniformgridcpu from dashboards.influx_queries import Query, show_tag_values @@ -90,6 +90,7 @@ def test_dashboard_fe2ti(): def test_dashboard_pystencils_cpu(): dashboard_pystencils_cpu() + dashboard_pystencils_gpu() def test_dashboard_walberla():