Skip to content
Snippets Groups Projects
Commit 879ee872 authored by Christoph Alt's avatar Christoph Alt
Browse files

removed the unused `cuda_block_size` for the `gpu.generate_benchmark`

function. For now the only way to set the cuda_block_size size is to use
pass it in the `ps.KernelConfig` to the generated kernel
parent d38a9324
No related branches found
No related tags found
1 merge request!1Add CUDA support
Pipeline #55674 skipped
...@@ -32,7 +32,6 @@ def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]], ...@@ -32,7 +32,6 @@ def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]],
*, *,
compiler: Compiler = Compiler.NVCC, compiler: Compiler = Compiler.NVCC,
timing: bool = True, timing: bool = True,
cuda_block_size: tuple = (32, 1, 1),
launch_bounds: tuple = None, launch_bounds: tuple = None,
) -> None: ) -> None:
...@@ -58,22 +57,19 @@ def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]], ...@@ -58,22 +57,19 @@ def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]],
f.write(source) f.write(source)
with open(src_path / 'main.cu', 'w+') as f: with open(src_path / 'main.cu', 'w+') as f:
f.write(kernel_main(kernel_asts, f.write(kernel_main(kernel_asts, timing=timing))
timing=timing,
cuda_block_size=cuda_block_size))
copy_static_files(path, source_file_suffix='.cu') copy_static_files(path, source_file_suffix='.cu')
compiler_toolchain(path, compiler, likwid=False) compiler_toolchain(path, compiler, likwid=False)
def kernel_main(kernels_ast: List[KernelFunction], *, timing: bool = True, cuda_block_size: tuple): def kernel_main(kernels_ast: List[KernelFunction], *, timing: bool = True):
""" """
Return C code of a benchmark program for the given kernel. Return C code of a benchmark program for the given kernel.
Args: Args:
kernels_ast: A list of the pystencils AST object as returned by create_kernel for benchmarking kernels_ast: A list of the pystencils AST object as returned by create_kernel for benchmarking
timing: add timing output to the code, prints time per iteration to stdout timing: add timing output to the code, prints time per iteration to stdout
cuda_block_size: defines the cuda block grid
Returns: Returns:
C code as string C code as string
""" """
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment