From c14b948595dedb4653a934a7f0b554846627c872 Mon Sep 17 00:00:00 2001
From: Michael Zikeli <michael.zikeli@fau.de>
Date: Mon, 10 Feb 2025 11:18:34 +0100
Subject: [PATCH] Refactor benchmark scenario creation to support multiple
 inner_outer_split configurations for weak and strong scaling benchmarks.

---
 .../simulation_setup/benchmark_configs_RDM.py | 45 ++++++-----
 .../simulation_setup/benchmark_configs_RDM.py | 78 ++++++++++---------
 2 files changed, 66 insertions(+), 57 deletions(-)

diff --git a/apps/benchmarks/UniformGridCPU/simulation_setup/benchmark_configs_RDM.py b/apps/benchmarks/UniformGridCPU/simulation_setup/benchmark_configs_RDM.py
index 071d1b4e0..251f5a28f 100644
--- a/apps/benchmarks/UniformGridCPU/simulation_setup/benchmark_configs_RDM.py
+++ b/apps/benchmarks/UniformGridCPU/simulation_setup/benchmark_configs_RDM.py
@@ -170,13 +170,14 @@ def weak_scaling_benchmark():
     scenarios = wlb.ScenarioManager()
 
     for t in ["simpleOverlap"]:
-        scenarios.add(Scenario(benchmark_name="weakScalingUniformGrid",
-                               time_step_strategy=t,
-                               inner_outer_split=(1, 1, 1),
-                               cells_per_block=(WeakX, WeakY, WeakZ),
-                               boundary_setup=True,
-                               outer_iterations=1,
-                               db_file_name="weakScalingUniformGridOneBlock.sqlite3"))
+        for split in [(1, 1, 1), (16, 1, 1)]:
+            scenarios.add(Scenario(benchmark_name="weakScalingUniformGrid",
+                                time_step_strategy=t,
+                                inner_outer_split=split,
+                                cells_per_block=(WeakX, WeakY, WeakZ),
+                                boundary_setup=True,
+                                outer_iterations=1,
+                                db_file_name="weakScalingUniformGridOneBlock.sqlite3"))
 
 
 def strong_scaling_benchmark():
@@ -190,13 +191,15 @@ def strong_scaling_benchmark():
     cells_per_block = tuple([d // b for d, b in zip(domain_size, reversed(blocks))])
 
     for t in ["simpleOverlap"]:
-        scenarios.add(Scenario(benchmark_name="strongScalingUniformGridOneBlock",
-                               cells_per_block=cells_per_block,
-                               time_step_strategy=t,
-                               outer_iterations=1,
-                               timesteps=num_time_steps(cells_per_block),
-                               boundary_setup=True,
-                               db_file_name="strongScalingUniformGridOneBlock.sqlite3"))
+        for split in [(1, 1, 1), (16, 1, 1)]:
+            scenarios.add(Scenario(benchmark_name="strongScalingUniformGridOneBlock",
+                                cells_per_block=cells_per_block,
+                                time_step_strategy=t,
+                                inner_outer_split=split,
+                                outer_iterations=1,
+                                timesteps=num_time_steps(cells_per_block),
+                                boundary_setup=True,
+                                db_file_name="strongScalingUniformGridOneBlock.sqlite3"))
 
 
 def single_node_benchmark():
@@ -205,12 +208,14 @@ def single_node_benchmark():
     wlb.log_info_on_root("")
 
     scenarios = wlb.ScenarioManager()
-    scenario = Scenario(benchmark_name="singleNodeUniformGridOneBlock",
-                        cells_per_block=(128, 128, 128),
-                        time_step_strategy='kernelOnly',
-                        outer_iterations=1,
-                        timesteps=10)
-    scenarios.add(scenario)
+    for split in [(1, 1, 1), (16, 1, 1)]:
+        scenario = Scenario(benchmark_name="singleNodeUniformGridOneBlock",
+                            cells_per_block=(128, 128, 128),
+                            time_step_strategy='kernelOnly',
+                            inner_outer_split=split,
+                            outer_iterations=1,
+                            timesteps=10)
+        scenarios.add(scenario)
 
 
 def validation_run():
diff --git a/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs_RDM.py b/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs_RDM.py
index 74269c0bc..bb9da2611 100755
--- a/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs_RDM.py
+++ b/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs_RDM.py
@@ -213,15 +213,16 @@ def weak_scaling_overlap(cuda_enabled_mpi=False):
 
     # overlap
     for t in ["simpleOverlap"]:
-        scenarios.add(Scenario(benchmark_name="weakScalingUniformGrid",
-                               cells_per_block=(WeakX, WeakY, WeakZ),
-                               cuda_blocks=(128, 1, 1),
-                               time_step_strategy=t,
-                               inner_outer_split=(8, 8, 8),
-                               cuda_enabled_mpi=cuda_enabled_mpi,
-                               outer_iterations=1,
-                               boundary_setup=True,
-                               db_file_name="weakScalingUniformGrid.sqlite3"))
+        for split in [(1, 1, 1), (8, 8, 8), (16, 1, 1)]:
+            scenarios.add(Scenario(benchmark_name="weakScalingUniformGrid",
+                                cells_per_block=(WeakX, WeakY, WeakZ),
+                                cuda_blocks=(128, 1, 1),
+                                time_step_strategy=t,
+                                inner_outer_split=split,
+                                cuda_enabled_mpi=cuda_enabled_mpi,
+                                outer_iterations=1,
+                                boundary_setup=True,
+                                db_file_name="weakScalingUniformGrid.sqlite3"))
 
 
 def strong_scaling_overlap(cuda_enabled_mpi=False):
@@ -236,17 +237,18 @@ def strong_scaling_overlap(cuda_enabled_mpi=False):
 
     # overlap
     for t in ["simpleOverlap"]:
-        scenarios.add(Scenario(benchmark_name="strongScalingUniformGridOneBlock",
-                               cells_per_block=cells_per_block,
-                               cuda_blocks=(128, 1, 1),
-                               time_step_strategy=t,
-                               inner_outer_split=(1, 1, 1),
-                               cuda_enabled_mpi=cuda_enabled_mpi,
-                               outer_iterations=1,
-                               timesteps=num_time_steps(cells_per_block),
-                               blocks=blocks,
-                               boundary_setup=True,
-                               db_file_name="strongScalingUniformGridOneBlock.sqlite3"))
+        for split in [(1, 1, 1), (8, 8, 8), (16, 1, 1)]:
+            scenarios.add(Scenario(benchmark_name="strongScalingUniformGridOneBlock",
+                                cells_per_block=cells_per_block,
+                                cuda_blocks=(128, 1, 1),
+                                time_step_strategy=t,
+                                inner_outer_split=split,
+                                cuda_enabled_mpi=cuda_enabled_mpi,
+                                outer_iterations=1,
+                                timesteps=num_time_steps(cells_per_block),
+                                blocks=blocks,
+                                boundary_setup=True,
+                                db_file_name="strongScalingUniformGridOneBlock.sqlite3"))
 
 
 def single_gpu_benchmark():
@@ -267,23 +269,25 @@ def single_gpu_benchmark():
     cuda_blocks = [(128, 1, 1), ]
     for block_size in block_sizes:
         for cuda_block_size in cuda_blocks:
-            # cuda_block_size = (256, 1, 1) and block_size = (64, 64, 64) would be cut to cuda_block_size = (64, 1, 1)
-            if cuda_block_size > block_size:
-                continue
-            if not cuda_block_size_ok(cuda_block_size):
-                wlb.log_info_on_root(f"Cuda block size {cuda_block_size} would exceed register limit. Skipping.")
-                continue
-            if not domain_block_size_ok(block_size, gpu_mem):
-                wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.")
-                continue
-            scenario = Scenario(benchmark_name="singleNodeUniformGridOneBlock",
-                                cells_per_block=block_size,
-                                cuda_blocks=cuda_block_size,
-                                time_step_strategy='kernelOnly',
-                                timesteps=num_time_steps(block_size, 2000),
-                                outer_iterations=1,
-                                additional_info=additional_info)
-            scenarios.add(scenario)
+            for split in [(1, 1, 1), (8, 8, 8), (16, 1, 1)]:
+                # cuda_block_size = (256, 1, 1) and block_size = (64, 64, 64) would be cut to cuda_block_size = (64, 1, 1)
+                if cuda_block_size > block_size:
+                    continue
+                if not cuda_block_size_ok(cuda_block_size):
+                    wlb.log_info_on_root(f"Cuda block size {cuda_block_size} would exceed register limit. Skipping.")
+                    continue
+                if not domain_block_size_ok(block_size, gpu_mem):
+                    wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.")
+                    continue
+                scenario = Scenario(benchmark_name="singleNodeUniformGridOneBlock",
+                                    cells_per_block=block_size,
+                                    cuda_blocks=cuda_block_size,
+                                    time_step_strategy='kernelOnly',
+                                    inner_outer_split=split,
+                                    timesteps=num_time_steps(block_size, 2000),
+                                    outer_iterations=1,
+                                    additional_info=additional_info)
+                scenarios.add(scenario)
 
 
 def validation_run():
-- 
GitLab