diff --git a/cbutil/ncu_parser.py b/cbutil/ncu_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..87b3280d7cc1ab3e4e03fff83092a3d41cecff76 --- /dev/null +++ b/cbutil/ncu_parser.py @@ -0,0 +1,86 @@ +import numpy as np +import pandas as pd + + +def parse_ncu_csv(file_name) -> pd.DataFrame: + return pd.read_csv(file_name, header=[0, 1]) + + +def get_unit(col): + return col.columns[0] + + +def detect_prefix(unit): + if unit[0] == 'G': + return 1e9 + elif unit[0] == 'M': + return 1e6 + elif unit[0] == 'K': + return 1e3 + elif unit[0] == 'm': + return 1e-3 + elif unit[0] == 'u': + return 1e-6 + elif unit[0] == 'n': + return 1e-9 + else: + return 1 + + +def get_normalized(col): + unit = get_unit(col) + if "/" in unit: + factor = np.divide(*[detect_prefix(u) for u in unit.split("/")]) + else: + factor = detect_prefix(unit) + return factor * col + + +def add_unit_prefix(value, prefix: str): + return value / detect_prefix(prefix) + + +def normalize_and_add_prefix(value, prefix: str): + return add_unit_prefix(get_normalized(value), prefix) + + +def extract_raw_counter(df: pd.DataFrame): + + fields = pd.DataFrame() + tags = pd.DataFrame() + + tags["Block Size"] = df["Block Size"] + tags["Grid Size"] = df["Grid Size"] + tags["GPU"] = df["device__attribute_display_name"] + + fields["Memory write data volume [GBytes]"] = normalize_and_add_prefix(df["dram__bytes_write.sum"], 'G') + fields["Memory read data volume [GBytes]"] = normalize_and_add_prefix(df["dram__bytes_read.sum"], 'G') + fields["Memory data volume [GBytes]"] = fields["Memory write data volume [GBytes]"] + \ + fields["Memory read data volume [GBytes]"] + + fields["Memory write bandwidth [MByte/s]"] = normalize_and_add_prefix(df["dram__bytes_write.sum.per_second"], 'M') + fields["Memory read bandwidth [MByte/s]"] = normalize_and_add_prefix(df["dram__bytes_read.sum.per_second"], 'M') + fields["Memory bandwidth [MByte/s]"] = normalize_and_add_prefix(df["dram__bytes.sum.per_second"], 'M') + fields["Runtime [s]"] = get_normalized(df["gpu__time_duration.sum"]) + + fields["SMSP Cycles [Cycles/s]"] = get_normalized(df["smsp__cycles_elapsed.avg.per_second"]) + fields["SMSP Cycles"] = fields["SMSP Cycles [Cycles/s]"] * fields["Runtime [s]"] + fields["FP inst per cycle"] = 2 * df["smsp__sass_thread_inst_executed_op_dfma_pred_on.sum.per_cycle_elapsed"] + \ + df["smsp__sass_thread_inst_executed_op_dadd_pred_on.sum.per_cycle_elapsed"] + \ + df["smsp__sass_thread_inst_executed_op_dmul_pred_on.sum.per_cycle_elapsed"] + fields["Total FP inst"] = fields["FP inst per cycle"] * fields["SMSP Cycles"] + + fields["Operational intensity"] = fields["Total FP inst"] / (fields["Memory data volume [GBytes]"] * 1e9) + fields["P_max [MFlop/s]"] = add_unit_prefix(fields["Operational intensity"] + * fields["Memory bandwidth [MByte/s]"] * 1e6, 'M') + fields["DP [MFlop/s]"] = np.divide(np.asarray(fields["Total FP inst"]), fields["Runtime [s]"]) / 1e6 + return fields, tags + + +def tail_to_dict(df): + return df.tail(1).iloc[0].to_dict() + + +def extract_from_csv(file_name: str): + fields, tags = extract_raw_counter(parse_ncu_csv(file_name)) + return tail_to_dict(fields), tail_to_dict(tags) diff --git a/pyproject.toml b/pyproject.toml index 50b29e4cbc5457f2da0686c9d2998b3abc518809..5bd434fedc66c4c8b31f6b95ceafebf7e656f72f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ dependencies = [ "requests", "kadi-apy", "numpy", + "pandas", "importlib_resources ; python_version<'3.7'", ] diff --git a/tests/test_ncu_parser.py b/tests/test_ncu_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..ab38be61f32dab3c068cc8f27747b26e63cbcf3f --- /dev/null +++ b/tests/test_ncu_parser.py @@ -0,0 +1,63 @@ +import pandas as pd +import pytest + +from cbutil.ncu_parser import extract_raw_counter + + +@pytest.fixture +def sample_data(): + # Create sample data for testing + data = { + ("Block Size", ""): [128, 256, 512], + ("Grid Size", ""): [64, 128, 256], + ("device__attribute_display_name", ""): ["GPU1", "GPU2", "GPU3"], + ("dram__bytes_write.sum", "Bytes"): [1e9, 2e9, 3e9], + ("dram__bytes_read.sum", "GBytes"): [0.5, 1., 1.5], + ("dram__bytes_write.sum.per_second", "MByte/s"): [100, 200, 300], + ("dram__bytes_read.sum.per_second", "MByte/s"): [50, 100, 150], + ("dram__bytes.sum.per_second", "MByte/s"): [150, 300, 450], + ("gpu__time_duration.sum", "msecond"): [10, 20, 30], + ("smsp__cycles_elapsed.avg.per_second", "Cycles/s"): [1000, 2000, 3000], + ("smsp__sass_thread_inst_executed_op_dfma_pred_on.sum.per_cycle_elapsed", ""): [500, 1000, 1500], + ("smsp__sass_thread_inst_executed_op_dadd_pred_on.sum.per_cycle_elapsed", ""): [0, 0, 0], + ("smsp__sass_thread_inst_executed_op_dmul_pred_on.sum.per_cycle_elapsed", ""): [0, 0, 0], + } + return pd.DataFrame(data) + + +def test_extract_raw_counter(sample_data): + fields, tags = extract_raw_counter(sample_data) + + # Check fields DataFrame + assert list(fields.columns) == [ + "Memory write data volume [GBytes]", + "Memory read data volume [GBytes]", + "Memory data volume [GBytes]", + "Memory write bandwidth [MByte/s]", + "Memory read bandwidth [MByte/s]", + "Memory bandwidth [MByte/s]", + "Runtime [s]", + "SMSP Cycles [Cycles/s]", + "SMSP Cycles", + "FP inst per cycle", + "Total FP inst", + "Operational intensity", + "P_max [MFlop/s]", + "DP [MFlop/s]", + ] + assert fields.shape == (3, 14) + + # Check values in fields DataFrame + assert fields["Memory write data volume [GBytes]"].tolist() == [1.0, 2.0, 3.0] + assert fields["Memory read data volume [GBytes]"].tolist() == [0.5, 1.0, 1.5] + assert fields["Memory data volume [GBytes]"].tolist() == [1.5, 3.0, 4.5] + # Add more checks for other columns in the fields DataFrame + + # Check tags DataFrame + assert list(tags.columns) == ["Block Size", "Grid Size", "GPU"] + assert tags.shape == (3, 3) + + # Check values in tags DataFrame + assert tags["Block Size"].tolist() == [128, 256, 512] + assert tags["Grid Size"].tolist() == [64, 128, 256] + assert tags["GPU"].tolist() == ["GPU1", "GPU2", "GPU3"]