Skip to content
Snippets Groups Projects
Commit 879ee872 authored by Christoph Alt's avatar Christoph Alt
Browse files

removed the unused `cuda_block_size` for the `gpu.generate_benchmark`

function. For now the only way to set the cuda_block_size size is to use
pass it in the `ps.KernelConfig` to the generated kernel
parent d38a9324
No related branches found
No related tags found
1 merge request!1Add CUDA support
Pipeline #55674 skipped
......@@ -32,7 +32,6 @@ def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]],
*,
compiler: Compiler = Compiler.NVCC,
timing: bool = True,
cuda_block_size: tuple = (32, 1, 1),
launch_bounds: tuple = None,
) -> None:
......@@ -58,22 +57,19 @@ def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]],
f.write(source)
with open(src_path / 'main.cu', 'w+') as f:
f.write(kernel_main(kernel_asts,
timing=timing,
cuda_block_size=cuda_block_size))
f.write(kernel_main(kernel_asts, timing=timing))
copy_static_files(path, source_file_suffix='.cu')
compiler_toolchain(path, compiler, likwid=False)
def kernel_main(kernels_ast: List[KernelFunction], *, timing: bool = True, cuda_block_size: tuple):
def kernel_main(kernels_ast: List[KernelFunction], *, timing: bool = True):
"""
Return C code of a benchmark program for the given kernel.
Args:
kernels_ast: A list of the pystencils AST object as returned by create_kernel for benchmarking
timing: add timing output to the code, prints time per iteration to stdout
cuda_block_size: defines the cuda block grid
Returns:
C code as string
"""
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment