diff --git a/cbutil/ncu_parser.py b/cbutil/ncu_parser.py index 820ae26645979cceb04805ba6b7ed635a20c8472..9af5f27c88e2f7a36791de14cc79ba9fe99164c3 100644 --- a/cbutil/ncu_parser.py +++ b/cbutil/ncu_parser.py @@ -1,5 +1,6 @@ import numpy as np import pandas as pd +import re from cbutil.ncu_keys import ( memory_write_data_key, memory_read_data_key, @@ -27,17 +28,17 @@ def get_unit(col): def detect_prefix(unit): - if unit[0] == 'G': + if unit[0] == "G": return 1e9 - elif unit[0] == 'M': + elif unit[0] == "M": return 1e6 - elif unit[0] == 'K': + elif unit[0] == "K": return 1e3 - elif unit[0] == 'm': + elif unit[0] == "m": return 1e-3 - elif unit[0] == 'u': + elif unit[0] == "u": return 1e-6 - elif unit[0] == 'n': + elif unit[0] == "n": return 1e-9 else: return 1 @@ -61,35 +62,57 @@ def normalize_and_add_prefix(value, prefix: str): def extract_raw_counter(df: pd.DataFrame): - fields = pd.DataFrame() tags = pd.DataFrame() tags["Block Size"] = df["Block Size"] tags["Grid Size"] = df["Grid Size"] - tags["GPU"] = df["device__attribute_display_name"] + tags["GPU"] = df["device__attribute_display_name"].replace(" ", "") - fields[memory_write_data_key] = normalize_and_add_prefix(df["dram__bytes_write.sum"], 'G') - fields[memory_read_data_key] = normalize_and_add_prefix(df["dram__bytes_read.sum"], 'G') - fields[memory_data_key] = fields[memory_write_data_key] + fields[memory_read_data_key] + fields[memory_write_data_key] = normalize_and_add_prefix( + df["dram__bytes_write.sum"], "G" + ) + fields[memory_read_data_key] = normalize_and_add_prefix( + df["dram__bytes_read.sum"], "G" + ) + fields[memory_data_key] = ( + fields[memory_write_data_key] + fields[memory_read_data_key] + ) - fields[memory_write_bandwidth_key] = normalize_and_add_prefix(df["dram__bytes_write.sum.per_second"], 'M') - fields[memory_read_bandwidth_key] = normalize_and_add_prefix(df["dram__bytes_read.sum.per_second"], 'M') - fields[memory_bandwidth_key] = normalize_and_add_prefix(df["dram__bytes.sum.per_second"], 'M') + fields[memory_write_bandwidth_key] = normalize_and_add_prefix( + df["dram__bytes_write.sum.per_second"], "M" + ) + fields[memory_read_bandwidth_key] = normalize_and_add_prefix( + df["dram__bytes_read.sum.per_second"], "M" + ) + fields[memory_bandwidth_key] = normalize_and_add_prefix( + df["dram__bytes.sum.per_second"], "M" + ) fields[runtime_key] = get_normalized(df["gpu__time_duration.sum"]) - fields[smsp_cycles_key] = get_normalized(df["smsp__cycles_elapsed.avg.per_second"]) + fields[smsp_cycles_key] = get_normalized( + df["smsp__cycles_elapsed.avg.per_second"]) fields[smsp_cycles_total_key] = fields[smsp_cycles_key] * fields[runtime_key] fields[fp_inst_per_cycle_key] = ( - 2 * df["smsp__sass_thread_inst_executed_op_dfma_pred_on.sum.per_cycle_elapsed"] + - df["smsp__sass_thread_inst_executed_op_dadd_pred_on.sum.per_cycle_elapsed"] + - df["smsp__sass_thread_inst_executed_op_dmul_pred_on.sum.per_cycle_elapsed"] + 2 * df["smsp__sass_thread_inst_executed_op_dfma_pred_on.sum.per_cycle_elapsed"] + + df["smsp__sass_thread_inst_executed_op_dadd_pred_on.sum.per_cycle_elapsed"] + + df["smsp__sass_thread_inst_executed_op_dmul_pred_on.sum.per_cycle_elapsed"] + ) + fields[total_fp_inst_key] = ( + fields[fp_inst_per_cycle_key] * fields[smsp_cycles_total_key] ) - fields[total_fp_inst_key] = fields[fp_inst_per_cycle_key] * fields[smsp_cycles_total_key] - fields[operational_intensity_key] = fields[total_fp_inst_key] / (fields[memory_data_key] * 1e9) - fields[p_max_key] = add_unit_prefix(fields[operational_intensity_key] * fields[memory_bandwidth_key] * 1e6, 'M') - fields[dp_key] = np.divide(np.asarray(fields[total_fp_inst_key]), fields[runtime_key]) / 1e6 + fields[operational_intensity_key] = fields[total_fp_inst_key] / ( + fields[memory_data_key] * 1e9 + ) + fields[p_max_key] = add_unit_prefix( + fields[operational_intensity_key] * + fields[memory_bandwidth_key] * 1e6, "M" + ) + fields[dp_key] = ( + np.divide(np.asarray(fields[total_fp_inst_key]), + fields[runtime_key]) / 1e6 + ) return fields, tags