diff --git a/pystencils_benchmark/gpu/benchmark.py b/pystencils_benchmark/gpu/benchmark.py
index 96fb58cada928da92aa55b50a344f694be6c78cc..1e9ce37cce4ff8679f6ef43d8a66e097d98dc2f7 100644
--- a/pystencils_benchmark/gpu/benchmark.py
+++ b/pystencils_benchmark/gpu/benchmark.py
@@ -32,7 +32,6 @@ def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]],
                        *,
                        compiler: Compiler = Compiler.NVCC,
                        timing: bool = True,
-                       cuda_block_size: tuple = (32, 1, 1),
                        launch_bounds: tuple = None,
                        ) -> None:
 
@@ -58,22 +57,19 @@ def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]],
             f.write(source)
 
     with open(src_path / 'main.cu', 'w+') as f:
-        f.write(kernel_main(kernel_asts,
-                            timing=timing,
-                            cuda_block_size=cuda_block_size))
+        f.write(kernel_main(kernel_asts, timing=timing))
 
     copy_static_files(path, source_file_suffix='.cu')
     compiler_toolchain(path, compiler, likwid=False)
 
 
-def kernel_main(kernels_ast: List[KernelFunction], *, timing: bool = True, cuda_block_size: tuple):
+def kernel_main(kernels_ast: List[KernelFunction], *, timing: bool = True):
     """
     Return C code of a benchmark program for the given kernel.
 
     Args:
         kernels_ast: A list of the pystencils AST object as returned by create_kernel for benchmarking
         timing: add timing output to the code, prints time per iteration to stdout
-        cuda_block_size: defines the cuda block grid
     Returns:
         C code as string
     """