removed the unused `cuda_block_size` for the `gpu.generate_benchmark`

function. For now the only way to set the cuda_block_size size is to use pass it in the `ps.KernelConfig` to the generated kernel

removed the unused `cuda_block_size` for the `gpu.generate_benchmark`
879ee872 · Christoph Alt · d38a9324 · 879ee872
Commit 879ee872 authored 1 year ago by Christoph Alt
--- a/pystencils_benchmark/gpu/benchmark.py
+++ b/pystencils_benchmark/gpu/benchmark.py
@@ -32,7 +32,6 @@ def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]],
                       *,
                       compiler: Compiler = Compiler.NVCC,
                       timing: bool = True,
-                       cuda_block_size: tuple = (32, 1, 1),
                       launch_bounds: tuple = None,
                       ) -> None:
@@ -58,22 +57,19 @@ def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]],
            f.write(source)
    with open(src_path / 'main.cu', 'w+') as f:
-        f.write(kernel_main(kernel_asts,
+        f.write(kernel_main(kernel_asts, timing=timing))
-                            timing=timing,
-                            cuda_block_size=cuda_block_size))
    copy_static_files(path, source_file_suffix='.cu')
    compiler_toolchain(path, compiler, likwid=False)
-def kernel_main(kernels_ast: List[KernelFunction], *, timing: bool = True, cuda_block_size: tuple):
+def kernel_main(kernels_ast: List[KernelFunction], *, timing: bool = True):
    """
    Return C code of a benchmark program for the given kernel.
    Args:
        kernels_ast: A list of the pystencils AST object as returned by create_kernel for benchmarking
        timing: add timing output to the code, prints time per iteration to stdout
-        cuda_block_size: defines the cuda block grid
    Returns:
        C code as string
    """