diff --git a/cbutil/ncu_keys.py b/cbutil/ncu_keys.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef78f52d1f73584a0382bde256ad28248c450e39
--- /dev/null
+++ b/cbutil/ncu_keys.py
@@ -0,0 +1,14 @@
+memory_write_data_key = "Memory write data volume [GBytes]"
+memory_read_data_key = "Memory read data volume [GBytes]"
+memory_data_key = "Memory data volume [GBytes]"
+memory_write_bandwidth_key = "Memory write bandwidth [MByte/s]"
+memory_read_bandwidth_key = "Memory read bandwidth [MByte/s]"
+memory_bandwidth_key = "Memory bandwidth [MByte/s]"
+runtime_key = "Runtime [s]"
+smsp_cycles_key = "SMSP Cycles [Cycles/s]"
+smsp_cycles_total_key = "SMSP Cycles"
+fp_inst_per_cycle_key = "FP inst per cycle"
+total_fp_inst_key = "Total FP inst"
+operational_intensity_key = "Operational intensity"
+p_max_key = "P_max [MFlop/s]"
+dp_key = "DP [MFlop/s]"
diff --git a/cbutil/ncu_parser.py b/cbutil/ncu_parser.py
index 87b3280d7cc1ab3e4e03fff83092a3d41cecff76..820ae26645979cceb04805ba6b7ed635a20c8472 100644
--- a/cbutil/ncu_parser.py
+++ b/cbutil/ncu_parser.py
@@ -1,5 +1,21 @@
 import numpy as np
 import pandas as pd
+from cbutil.ncu_keys import (
+    memory_write_data_key,
+    memory_read_data_key,
+    memory_data_key,
+    memory_write_bandwidth_key,
+    memory_read_bandwidth_key,
+    memory_bandwidth_key,
+    runtime_key,
+    smsp_cycles_key,
+    smsp_cycles_total_key,
+    fp_inst_per_cycle_key,
+    total_fp_inst_key,
+    operational_intensity_key,
+    p_max_key,
+    dp_key,
+)
 
 
 def parse_ncu_csv(file_name) -> pd.DataFrame:
@@ -53,27 +69,27 @@ def extract_raw_counter(df: pd.DataFrame):
     tags["Grid Size"] = df["Grid Size"]
     tags["GPU"] = df["device__attribute_display_name"]
 
-    fields["Memory write data volume [GBytes]"] = normalize_and_add_prefix(df["dram__bytes_write.sum"], 'G')
-    fields["Memory read data volume [GBytes]"] = normalize_and_add_prefix(df["dram__bytes_read.sum"], 'G')
-    fields["Memory data volume [GBytes]"] = fields["Memory write data volume [GBytes]"] + \
-        fields["Memory read data volume [GBytes]"]
+    fields[memory_write_data_key] = normalize_and_add_prefix(df["dram__bytes_write.sum"], 'G')
+    fields[memory_read_data_key] = normalize_and_add_prefix(df["dram__bytes_read.sum"], 'G')
+    fields[memory_data_key] = fields[memory_write_data_key] + fields[memory_read_data_key]
 
-    fields["Memory write bandwidth [MByte/s]"] = normalize_and_add_prefix(df["dram__bytes_write.sum.per_second"], 'M')
-    fields["Memory read bandwidth [MByte/s]"] = normalize_and_add_prefix(df["dram__bytes_read.sum.per_second"], 'M')
-    fields["Memory bandwidth [MByte/s]"] = normalize_and_add_prefix(df["dram__bytes.sum.per_second"], 'M')
-    fields["Runtime [s]"] = get_normalized(df["gpu__time_duration.sum"])
+    fields[memory_write_bandwidth_key] = normalize_and_add_prefix(df["dram__bytes_write.sum.per_second"], 'M')
+    fields[memory_read_bandwidth_key] = normalize_and_add_prefix(df["dram__bytes_read.sum.per_second"], 'M')
+    fields[memory_bandwidth_key] = normalize_and_add_prefix(df["dram__bytes.sum.per_second"], 'M')
+    fields[runtime_key] = get_normalized(df["gpu__time_duration.sum"])
 
-    fields["SMSP Cycles [Cycles/s]"] = get_normalized(df["smsp__cycles_elapsed.avg.per_second"])
-    fields["SMSP Cycles"] = fields["SMSP Cycles [Cycles/s]"] * fields["Runtime [s]"]
-    fields["FP inst per cycle"] = 2 * df["smsp__sass_thread_inst_executed_op_dfma_pred_on.sum.per_cycle_elapsed"] + \
-        df["smsp__sass_thread_inst_executed_op_dadd_pred_on.sum.per_cycle_elapsed"] + \
+    fields[smsp_cycles_key] = get_normalized(df["smsp__cycles_elapsed.avg.per_second"])
+    fields[smsp_cycles_total_key] = fields[smsp_cycles_key] * fields[runtime_key]
+    fields[fp_inst_per_cycle_key] = (
+        2 * df["smsp__sass_thread_inst_executed_op_dfma_pred_on.sum.per_cycle_elapsed"] +
+        df["smsp__sass_thread_inst_executed_op_dadd_pred_on.sum.per_cycle_elapsed"] +
         df["smsp__sass_thread_inst_executed_op_dmul_pred_on.sum.per_cycle_elapsed"]
-    fields["Total FP inst"] = fields["FP inst per cycle"] * fields["SMSP Cycles"]
+    )
+    fields[total_fp_inst_key] = fields[fp_inst_per_cycle_key] * fields[smsp_cycles_total_key]
 
-    fields["Operational intensity"] = fields["Total FP inst"] / (fields["Memory data volume [GBytes]"] * 1e9)
-    fields["P_max [MFlop/s]"] = add_unit_prefix(fields["Operational intensity"]
-                                                * fields["Memory bandwidth [MByte/s]"] * 1e6, 'M')
-    fields["DP [MFlop/s]"] = np.divide(np.asarray(fields["Total FP inst"]), fields["Runtime [s]"]) / 1e6
+    fields[operational_intensity_key] = fields[total_fp_inst_key] / (fields[memory_data_key] * 1e9)
+    fields[p_max_key] = add_unit_prefix(fields[operational_intensity_key] * fields[memory_bandwidth_key] * 1e6, 'M')
+    fields[dp_key] = np.divide(np.asarray(fields[total_fp_inst_key]), fields[runtime_key]) / 1e6
     return fields, tags
 
 
diff --git a/dashboards/dashboard_pystencils.py b/dashboards/dashboard_pystencils.py
index 3d4f7c7fb3b3460dd5ea8b3947a9ea03ffbfda96..67c2f85ecda0e55e6e1efd44946a9d2ba8c5a1e0 100644
--- a/dashboards/dashboard_pystencils.py
+++ b/dashboards/dashboard_pystencils.py
@@ -11,6 +11,19 @@ from dashboards.variables import get_dashboard_variable, Filter, get_measurement
 from dashboards.influx_queries import join_variable_and
 from dashboards.legends import Units
 
+from cbutil.ncu_keys import (
+    memory_write_data_key,
+    memory_read_data_key,
+    memory_data_key,
+    memory_write_bandwidth_key,
+    memory_read_bandwidth_key,
+    memory_bandwidth_key,
+    runtime_key,
+    operational_intensity_key,
+    p_max_key,
+    dp_key,
+)
+
 INTEL_LINESTYLE = "solid"
 GCC_LINESTYLE = "dashed"
 
@@ -75,3 +88,64 @@ def dashboard_pystencils_cpu():
                            rows=[row],
                            templating=[*filter_vars, benchmark],
                            annotations=annotations)
+
+
+def dashboard_pystencils_gpu():
+    data_source = "pystencils"
+    row_repeat = "host"
+    options = DashboardOptions(
+        title="pystencils GPU Benchmarks",
+        description="Benchmarks for pystencils",
+        tags=['benchmark', 'pystencils', 'GPU'],
+        timezone="browser",
+    )
+
+    filters = [
+        Filter("host", default_value="medusa"),
+        Filter("PYSTENCILS_PROJECT_ID", default_value="pycodegen/pystencils"),
+        Filter("PYSTENCILS_BRANCH", default_value="master"),
+        Filter("GPU"),
+    ]
+
+    fields = [PanelInfos(runtime_key, Units.seconds),
+              PanelInfos(dp_key, Units.mflop_sec),
+              PanelInfos(p_max_key, Units.mflop_sec),
+              PanelInfos(f'{dp_key}"/"{p_max_key}', Units.percent),
+              PanelInfos(operational_intensity_key, Units.flop_per_byte),
+              PanelInfos(memory_bandwidth_key, Units.mbytes_per_second),
+              PanelInfos(memory_write_bandwidth_key, Units.mbytes_per_second),
+              PanelInfos(memory_read_bandwidth_key, Units.mbytes_per_second),
+              PanelInfos(memory_data_key, Units.gigabyte),
+              PanelInfos(memory_write_data_key, Units.gigabyte),
+              PanelInfos(memory_read_data_key, Units.gigabyte),
+              ]
+
+    filter_vars = [get_dashboard_variable(filter, "", data_source) for filter in filters]
+    benchmark = get_measurement_filter("benchmark", data_source, filter_pattern="_gpu$")
+
+    row_repeat_var = [fv for fv in filter_vars if fv.name == row_repeat][0]
+
+    where = join_variable_and([f.name for f in filters])
+    annotations = get_commit_annotation(data_source, "red", "commits", "vadd_gpu", commit_key="pystencils-commit")
+    group_by = [f.name for f in filters]
+    group_by.append("array_shape")
+
+    panels = [
+        get_time_series_panel(
+            field,
+            data_source,
+            f"/^${benchmark.name}$/",
+            where=where,
+            group_by=group_by,
+        )
+        for field in fields]
+
+    row = pack_in_row(
+        title=f"{row_repeat}: ${row_repeat_var.name}",
+        panels=[*panels],
+        repeat=Repeat('v', row_repeat_var.name),
+    )
+    return build_dashboard(options,
+                           rows=[row],
+                           templating=[*filter_vars, benchmark],
+                           annotations=annotations)
diff --git a/dashboards/deploy.py b/dashboards/deploy.py
index be0800978ff2c672ee1ec8d0bceb0ee4d8704ed9..ab55de7f8751dee5996db421321922c63591a738 100644
--- a/dashboards/deploy.py
+++ b/dashboards/deploy.py
@@ -4,7 +4,7 @@ import logging
 import dashboards.dashboard_list as boards
 from dashboards.upload import upload_dashboard
 from dashboards.dashboard_fe2ti import dashboard_fe2ti
-from dashboard_pystencils import dashboard_pystencils_cpu
+from dashboard_pystencils import dashboard_pystencils_cpu, dashboard_pystencils_gpu
 from dashboard_walberla import dashboard_uniformgridcpu, dashboard_uniformgridgpu
 
 logger = logging.getLogger(__file__)
@@ -40,6 +40,7 @@ def main():
             upload_dashboard(dashboard_fe2ti(), folder=fe2ti_folder)
         elif "pystencils" in board_name:
             upload_dashboard(dashboard_pystencils_cpu(), folder=pystencils_foler)
+            upload_dashboard(dashboard_pystencils_gpu(), folder=pystencils_foler)
         elif "walberla" in board_name:
             upload_dashboard(dashboard_uniformgridcpu(), folder=walberla_folder)
             upload_dashboard(dashboard_uniformgridgpu(), folder=walberla_folder)
diff --git a/tests/test_dashboard_creation.py b/tests/test_dashboard_creation.py
index 9acea28ba9313b166a423fadf11aa74828fc73a2..51030173a31f99a9feffc9e6143acbef6a6d71ca 100644
--- a/tests/test_dashboard_creation.py
+++ b/tests/test_dashboard_creation.py
@@ -6,7 +6,7 @@ from dashboards.dashboard_base import (get_commit_annotation,
                                        get_influx_target)
 from dashboards.dashboard_list import dashboard_uniformGridGPU
 from dashboards.dashboard_fe2ti import dashboard_fe2ti
-from dashboards.dashboard_pystencils import dashboard_pystencils_cpu
+from dashboards.dashboard_pystencils import dashboard_pystencils_cpu, dashboard_pystencils_gpu
 from dashboards.dashboard_walberla import dashboard_uniformgridgpu, dashboard_uniformgridcpu
 from dashboards.influx_queries import Query, show_tag_values
 
@@ -90,6 +90,7 @@ def test_dashboard_fe2ti():
 
 def test_dashboard_pystencils_cpu():
     dashboard_pystencils_cpu()
+    dashboard_pystencils_gpu()
 
 
 def test_dashboard_walberla():