From 9140da63f4446be8b1c9c5f58617654f5fffb485 Mon Sep 17 00:00:00 2001 From: Christoph Alt <christoph.alt@fau.de> Date: Tue, 8 Aug 2023 16:22:11 +0200 Subject: [PATCH] Added a parameter to insert a launch bounds to the kernel --- pystencils_benchmark/gpu/benchmark.py | 13 ++++++++++++- tests/test_benchmark.py | 8 ++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/pystencils_benchmark/gpu/benchmark.py b/pystencils_benchmark/gpu/benchmark.py index d653d18..5a4852c 100644 --- a/pystencils_benchmark/gpu/benchmark.py +++ b/pystencils_benchmark/gpu/benchmark.py @@ -17,12 +17,19 @@ from pystencils_benchmark.common import (_env, from pystencils_benchmark.enums import Compiler +def _add_launch_bound(code: str, launch_bounds: tuple) -> str: + lb_str = f"__launch_bounds__({','.join(str(lb) for lb in launch_bounds)})" + splitted = code.split("void") + return splitted[0] + lb_str + "".join(splitted[1:]) + + def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]], path: Path = None, *, compiler: Compiler = Compiler.NVCC, timing: bool = True, - cuda_block_size: tuple = (32, 1, 1) + cuda_block_size: tuple = (32, 1, 1), + launch_bounds: tuple = None, ) -> None: src_path, include_path = setup_directories(path) @@ -34,10 +41,14 @@ def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]], kernel_name = kernel_ast.function_name header = kernel_header(kernel_ast) + if launch_bounds: + header = _add_launch_bound(header, launch_bounds) with open(include_path / f'{kernel_name}.h', 'w+') as f: f.write(header) source = kernel_source(kernel_ast) + if launch_bounds: + source = _add_launch_bound(source, launch_bounds) # TODO CUDA specific suffix with open(src_path / f'{kernel_name}.cu', 'w+') as f: f.write(source) diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index 0c42c79..929d4f8 100755 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -41,7 +41,11 @@ def test_generate(compiler, config_kwarg): subprocess.run([f'{temp_dir}/benchmark-{compiler.name}', '10'], check=True) -def test_generate_gpu(): +gpu_kwargs = ({}, {'launch_bounds': (256,)}, {'launch_bounds': (256, 2)}) + + +@pytest.mark.parametrize('kwargs', gpu_kwargs) +def test_generate_gpu(kwargs): compiler = Compiler.NVCC a, b, c = ps.fields(a=np.ones(4000000), b=np.ones(4000000), c=np.ones(4000000)) @@ -52,6 +56,6 @@ def test_generate_gpu(): with tempfile.TemporaryDirectory(dir=Path.cwd()) as temp_dir: temp_dir = Path(temp_dir) - pb.gpu.generate_benchmark(kernel_vadd, temp_dir, compiler=compiler) + pb.gpu.generate_benchmark(kernel_vadd, temp_dir, compiler=compiler, **kwargs) # subprocess.run(['make', '-C', f'{temp_dir}'], check=True) # subprocess.run([f'{temp_dir}/benchmark-{compiler.name}', '10'], check=True) -- GitLab