diff --git a/pystencils_benchmark/gpu/benchmark.py b/pystencils_benchmark/gpu/benchmark.py index 96fb58cada928da92aa55b50a344f694be6c78cc..1e9ce37cce4ff8679f6ef43d8a66e097d98dc2f7 100644 --- a/pystencils_benchmark/gpu/benchmark.py +++ b/pystencils_benchmark/gpu/benchmark.py @@ -32,7 +32,6 @@ def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]], *, compiler: Compiler = Compiler.NVCC, timing: bool = True, - cuda_block_size: tuple = (32, 1, 1), launch_bounds: tuple = None, ) -> None: @@ -58,22 +57,19 @@ def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]], f.write(source) with open(src_path / 'main.cu', 'w+') as f: - f.write(kernel_main(kernel_asts, - timing=timing, - cuda_block_size=cuda_block_size)) + f.write(kernel_main(kernel_asts, timing=timing)) copy_static_files(path, source_file_suffix='.cu') compiler_toolchain(path, compiler, likwid=False) -def kernel_main(kernels_ast: List[KernelFunction], *, timing: bool = True, cuda_block_size: tuple): +def kernel_main(kernels_ast: List[KernelFunction], *, timing: bool = True): """ Return C code of a benchmark program for the given kernel. Args: kernels_ast: A list of the pystencils AST object as returned by create_kernel for benchmarking timing: add timing output to the code, prints time per iteration to stdout - cuda_block_size: defines the cuda block grid Returns: C code as string """