Skip to content
Snippets Groups Projects
Commit 9140da63 authored by Christoph Alt's avatar Christoph Alt
Browse files

Added a parameter to insert a launch bounds to the kernel

parent 857f1848
No related branches found
No related tags found
1 merge request!1Add CUDA support
Pipeline #54975 skipped
...@@ -17,12 +17,19 @@ from pystencils_benchmark.common import (_env, ...@@ -17,12 +17,19 @@ from pystencils_benchmark.common import (_env,
from pystencils_benchmark.enums import Compiler from pystencils_benchmark.enums import Compiler
def _add_launch_bound(code: str, launch_bounds: tuple) -> str:
lb_str = f"__launch_bounds__({','.join(str(lb) for lb in launch_bounds)})"
splitted = code.split("void")
return splitted[0] + lb_str + "".join(splitted[1:])
def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]], def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]],
path: Path = None, path: Path = None,
*, *,
compiler: Compiler = Compiler.NVCC, compiler: Compiler = Compiler.NVCC,
timing: bool = True, timing: bool = True,
cuda_block_size: tuple = (32, 1, 1) cuda_block_size: tuple = (32, 1, 1),
launch_bounds: tuple = None,
) -> None: ) -> None:
src_path, include_path = setup_directories(path) src_path, include_path = setup_directories(path)
...@@ -34,10 +41,14 @@ def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]], ...@@ -34,10 +41,14 @@ def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]],
kernel_name = kernel_ast.function_name kernel_name = kernel_ast.function_name
header = kernel_header(kernel_ast) header = kernel_header(kernel_ast)
if launch_bounds:
header = _add_launch_bound(header, launch_bounds)
with open(include_path / f'{kernel_name}.h', 'w+') as f: with open(include_path / f'{kernel_name}.h', 'w+') as f:
f.write(header) f.write(header)
source = kernel_source(kernel_ast) source = kernel_source(kernel_ast)
if launch_bounds:
source = _add_launch_bound(source, launch_bounds)
# TODO CUDA specific suffix # TODO CUDA specific suffix
with open(src_path / f'{kernel_name}.cu', 'w+') as f: with open(src_path / f'{kernel_name}.cu', 'w+') as f:
f.write(source) f.write(source)
......
...@@ -41,7 +41,11 @@ def test_generate(compiler, config_kwarg): ...@@ -41,7 +41,11 @@ def test_generate(compiler, config_kwarg):
subprocess.run([f'{temp_dir}/benchmark-{compiler.name}', '10'], check=True) subprocess.run([f'{temp_dir}/benchmark-{compiler.name}', '10'], check=True)
def test_generate_gpu(): gpu_kwargs = ({}, {'launch_bounds': (256,)}, {'launch_bounds': (256, 2)})
@pytest.mark.parametrize('kwargs', gpu_kwargs)
def test_generate_gpu(kwargs):
compiler = Compiler.NVCC compiler = Compiler.NVCC
a, b, c = ps.fields(a=np.ones(4000000), b=np.ones(4000000), c=np.ones(4000000)) a, b, c = ps.fields(a=np.ones(4000000), b=np.ones(4000000), c=np.ones(4000000))
...@@ -52,6 +56,6 @@ def test_generate_gpu(): ...@@ -52,6 +56,6 @@ def test_generate_gpu():
with tempfile.TemporaryDirectory(dir=Path.cwd()) as temp_dir: with tempfile.TemporaryDirectory(dir=Path.cwd()) as temp_dir:
temp_dir = Path(temp_dir) temp_dir = Path(temp_dir)
pb.gpu.generate_benchmark(kernel_vadd, temp_dir, compiler=compiler) pb.gpu.generate_benchmark(kernel_vadd, temp_dir, compiler=compiler, **kwargs)
# subprocess.run(['make', '-C', f'{temp_dir}'], check=True) # subprocess.run(['make', '-C', f'{temp_dir}'], check=True)
# subprocess.run([f'{temp_dir}/benchmark-{compiler.name}', '10'], check=True) # subprocess.run([f'{temp_dir}/benchmark-{compiler.name}', '10'], check=True)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment