From c14b948595dedb4653a934a7f0b554846627c872 Mon Sep 17 00:00:00 2001 From: Michael Zikeli <michael.zikeli@fau.de> Date: Mon, 10 Feb 2025 11:18:34 +0100 Subject: [PATCH] Refactor benchmark scenario creation to support multiple inner_outer_split configurations for weak and strong scaling benchmarks. --- .../simulation_setup/benchmark_configs_RDM.py | 45 ++++++----- .../simulation_setup/benchmark_configs_RDM.py | 78 ++++++++++--------- 2 files changed, 66 insertions(+), 57 deletions(-) diff --git a/apps/benchmarks/UniformGridCPU/simulation_setup/benchmark_configs_RDM.py b/apps/benchmarks/UniformGridCPU/simulation_setup/benchmark_configs_RDM.py index 071d1b4e0..251f5a28f 100644 --- a/apps/benchmarks/UniformGridCPU/simulation_setup/benchmark_configs_RDM.py +++ b/apps/benchmarks/UniformGridCPU/simulation_setup/benchmark_configs_RDM.py @@ -170,13 +170,14 @@ def weak_scaling_benchmark(): scenarios = wlb.ScenarioManager() for t in ["simpleOverlap"]: - scenarios.add(Scenario(benchmark_name="weakScalingUniformGrid", - time_step_strategy=t, - inner_outer_split=(1, 1, 1), - cells_per_block=(WeakX, WeakY, WeakZ), - boundary_setup=True, - outer_iterations=1, - db_file_name="weakScalingUniformGridOneBlock.sqlite3")) + for split in [(1, 1, 1), (16, 1, 1)]: + scenarios.add(Scenario(benchmark_name="weakScalingUniformGrid", + time_step_strategy=t, + inner_outer_split=split, + cells_per_block=(WeakX, WeakY, WeakZ), + boundary_setup=True, + outer_iterations=1, + db_file_name="weakScalingUniformGridOneBlock.sqlite3")) def strong_scaling_benchmark(): @@ -190,13 +191,15 @@ def strong_scaling_benchmark(): cells_per_block = tuple([d // b for d, b in zip(domain_size, reversed(blocks))]) for t in ["simpleOverlap"]: - scenarios.add(Scenario(benchmark_name="strongScalingUniformGridOneBlock", - cells_per_block=cells_per_block, - time_step_strategy=t, - outer_iterations=1, - timesteps=num_time_steps(cells_per_block), - boundary_setup=True, - db_file_name="strongScalingUniformGridOneBlock.sqlite3")) + for split in [(1, 1, 1), (16, 1, 1)]: + scenarios.add(Scenario(benchmark_name="strongScalingUniformGridOneBlock", + cells_per_block=cells_per_block, + time_step_strategy=t, + inner_outer_split=split, + outer_iterations=1, + timesteps=num_time_steps(cells_per_block), + boundary_setup=True, + db_file_name="strongScalingUniformGridOneBlock.sqlite3")) def single_node_benchmark(): @@ -205,12 +208,14 @@ def single_node_benchmark(): wlb.log_info_on_root("") scenarios = wlb.ScenarioManager() - scenario = Scenario(benchmark_name="singleNodeUniformGridOneBlock", - cells_per_block=(128, 128, 128), - time_step_strategy='kernelOnly', - outer_iterations=1, - timesteps=10) - scenarios.add(scenario) + for split in [(1, 1, 1), (16, 1, 1)]: + scenario = Scenario(benchmark_name="singleNodeUniformGridOneBlock", + cells_per_block=(128, 128, 128), + time_step_strategy='kernelOnly', + inner_outer_split=split, + outer_iterations=1, + timesteps=10) + scenarios.add(scenario) def validation_run(): diff --git a/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs_RDM.py b/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs_RDM.py index 74269c0bc..bb9da2611 100755 --- a/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs_RDM.py +++ b/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs_RDM.py @@ -213,15 +213,16 @@ def weak_scaling_overlap(cuda_enabled_mpi=False): # overlap for t in ["simpleOverlap"]: - scenarios.add(Scenario(benchmark_name="weakScalingUniformGrid", - cells_per_block=(WeakX, WeakY, WeakZ), - cuda_blocks=(128, 1, 1), - time_step_strategy=t, - inner_outer_split=(8, 8, 8), - cuda_enabled_mpi=cuda_enabled_mpi, - outer_iterations=1, - boundary_setup=True, - db_file_name="weakScalingUniformGrid.sqlite3")) + for split in [(1, 1, 1), (8, 8, 8), (16, 1, 1)]: + scenarios.add(Scenario(benchmark_name="weakScalingUniformGrid", + cells_per_block=(WeakX, WeakY, WeakZ), + cuda_blocks=(128, 1, 1), + time_step_strategy=t, + inner_outer_split=split, + cuda_enabled_mpi=cuda_enabled_mpi, + outer_iterations=1, + boundary_setup=True, + db_file_name="weakScalingUniformGrid.sqlite3")) def strong_scaling_overlap(cuda_enabled_mpi=False): @@ -236,17 +237,18 @@ def strong_scaling_overlap(cuda_enabled_mpi=False): # overlap for t in ["simpleOverlap"]: - scenarios.add(Scenario(benchmark_name="strongScalingUniformGridOneBlock", - cells_per_block=cells_per_block, - cuda_blocks=(128, 1, 1), - time_step_strategy=t, - inner_outer_split=(1, 1, 1), - cuda_enabled_mpi=cuda_enabled_mpi, - outer_iterations=1, - timesteps=num_time_steps(cells_per_block), - blocks=blocks, - boundary_setup=True, - db_file_name="strongScalingUniformGridOneBlock.sqlite3")) + for split in [(1, 1, 1), (8, 8, 8), (16, 1, 1)]: + scenarios.add(Scenario(benchmark_name="strongScalingUniformGridOneBlock", + cells_per_block=cells_per_block, + cuda_blocks=(128, 1, 1), + time_step_strategy=t, + inner_outer_split=split, + cuda_enabled_mpi=cuda_enabled_mpi, + outer_iterations=1, + timesteps=num_time_steps(cells_per_block), + blocks=blocks, + boundary_setup=True, + db_file_name="strongScalingUniformGridOneBlock.sqlite3")) def single_gpu_benchmark(): @@ -267,23 +269,25 @@ def single_gpu_benchmark(): cuda_blocks = [(128, 1, 1), ] for block_size in block_sizes: for cuda_block_size in cuda_blocks: - # cuda_block_size = (256, 1, 1) and block_size = (64, 64, 64) would be cut to cuda_block_size = (64, 1, 1) - if cuda_block_size > block_size: - continue - if not cuda_block_size_ok(cuda_block_size): - wlb.log_info_on_root(f"Cuda block size {cuda_block_size} would exceed register limit. Skipping.") - continue - if not domain_block_size_ok(block_size, gpu_mem): - wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.") - continue - scenario = Scenario(benchmark_name="singleNodeUniformGridOneBlock", - cells_per_block=block_size, - cuda_blocks=cuda_block_size, - time_step_strategy='kernelOnly', - timesteps=num_time_steps(block_size, 2000), - outer_iterations=1, - additional_info=additional_info) - scenarios.add(scenario) + for split in [(1, 1, 1), (8, 8, 8), (16, 1, 1)]: + # cuda_block_size = (256, 1, 1) and block_size = (64, 64, 64) would be cut to cuda_block_size = (64, 1, 1) + if cuda_block_size > block_size: + continue + if not cuda_block_size_ok(cuda_block_size): + wlb.log_info_on_root(f"Cuda block size {cuda_block_size} would exceed register limit. Skipping.") + continue + if not domain_block_size_ok(block_size, gpu_mem): + wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.") + continue + scenario = Scenario(benchmark_name="singleNodeUniformGridOneBlock", + cells_per_block=block_size, + cuda_blocks=cuda_block_size, + time_step_strategy='kernelOnly', + inner_outer_split=split, + timesteps=num_time_steps(block_size, 2000), + outer_iterations=1, + additional_info=additional_info) + scenarios.add(scenario) def validation_run(): -- GitLab