diff --git a/pystencils_benchmark/benchmark.py b/pystencils_benchmark/benchmark.py
index 3247800cad565f5775dbf8be73e0d3773d79ec21..0cc7b115ac60dc82d5699f908b7ef38cc9c017ad 100644
--- a/pystencils_benchmark/benchmark.py
+++ b/pystencils_benchmark/benchmark.py
@@ -6,7 +6,7 @@ from jinja2 import Environment, PackageLoader, StrictUndefined
 import numpy as np
 
 from pystencils.backends.cbackend import generate_c, get_headers
-from pystencils.astnodes import KernelFunction
+from pystencils.astnodes import KernelFunction, PragmaBlock
 from pystencils.enums import Backend
 from pystencils.typing import get_base_type
 from pystencils.sympyextensions import prod
diff --git a/pystencils_benchmark/benchmark_gpu.py b/pystencils_benchmark/benchmark_gpu.py
index 1c4e24c4a543f4afdb28d4c96ef967c407d50d77..befd83d734a1587920235f6c25fa3a8641877e12 100644
--- a/pystencils_benchmark/benchmark_gpu.py
+++ b/pystencils_benchmark/benchmark_gpu.py
@@ -20,7 +20,10 @@ _env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=Stric
 def generate_benchmark_gpu(kernel_asts: Union[KernelFunction, List[KernelFunction]],
                            path: Path = None,
                            *,
-                           compiler: Compiler = Compiler.GCC) -> None:
+                           compiler: Compiler = Compiler.GCC,
+                           timing: bool = True,
+                           cuda_block_size: tuple = (32, 1, 1)
+                           ) -> None:
     if path is None:
         path = Path('.')
     else:
@@ -46,7 +49,9 @@ def generate_benchmark_gpu(kernel_asts: Union[KernelFunction, List[KernelFunctio
             f.write(source)
 
     with open(src_path / 'main.cu', 'w+') as f:
-        f.write(kernel_main(kernel_asts))
+        f.write(kernel_main(kernel_asts,
+                            timing=timing,
+                            cuda_block_size=cuda_block_size))
 
     copy_static_files(path)
     compiler_toolchain(path, compiler)
@@ -56,6 +61,7 @@ def compiler_toolchain(path: Path, compiler: Compiler) -> None:
     name = compiler.name
     jinja_context = {
         'compiler': name,
+        'likwid': False,
     }
 
     files = ['Makefile', f'{name}.mk']
@@ -86,7 +92,7 @@ def copy_static_files(path: Path) -> None:
             f.write(template)
 
 
-def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True, cuda_block_size: tuple = (32, 1, 1)):
+def kernel_main(kernels_ast: List[KernelFunction], *, timing: bool = True, cuda_block_size: tuple):
     """
     Return C code of a benchmark program for the given kernel.
 
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index 4ecd4d8e381a1b4a61416fdc2d2686510745f1b7..411eb8362f12a3027a4857ff4b8d5bce91bed683 100755
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -48,9 +48,9 @@ def test_generate_gpu():
         a[0] @= b[0] + c[0]
     kernel_vadd = ps.create_kernel(**vadd)
 
-    temp_dir = Path('/home/markus/pystencils_benchmark_testfolder')
+    temp_dir = Path('/tmp/pystencils_benchmark_testfolder')
     generate_benchmark_gpu([kernel_vadd], temp_dir, compiler=compiler)
-    subprocess.run(['make', '-C', f'{temp_dir}'], check=True)
-    subprocess.run([f'{temp_dir}/benchmark-{compiler.name}', '10'], check=True)
+    # subprocess.run(['make', '-C', f'{temp_dir}'], check=True)
+    # subprocess.run([f'{temp_dir}/benchmark-{compiler.name}', '10'], check=True)