diff --git a/pystencils_benchmark/benchmark.py b/pystencils_benchmark/benchmark.py index 3247800cad565f5775dbf8be73e0d3773d79ec21..0cc7b115ac60dc82d5699f908b7ef38cc9c017ad 100644 --- a/pystencils_benchmark/benchmark.py +++ b/pystencils_benchmark/benchmark.py @@ -6,7 +6,7 @@ from jinja2 import Environment, PackageLoader, StrictUndefined import numpy as np from pystencils.backends.cbackend import generate_c, get_headers -from pystencils.astnodes import KernelFunction +from pystencils.astnodes import KernelFunction, PragmaBlock from pystencils.enums import Backend from pystencils.typing import get_base_type from pystencils.sympyextensions import prod diff --git a/pystencils_benchmark/benchmark_gpu.py b/pystencils_benchmark/benchmark_gpu.py index 1c4e24c4a543f4afdb28d4c96ef967c407d50d77..befd83d734a1587920235f6c25fa3a8641877e12 100644 --- a/pystencils_benchmark/benchmark_gpu.py +++ b/pystencils_benchmark/benchmark_gpu.py @@ -20,7 +20,10 @@ _env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=Stric def generate_benchmark_gpu(kernel_asts: Union[KernelFunction, List[KernelFunction]], path: Path = None, *, - compiler: Compiler = Compiler.GCC) -> None: + compiler: Compiler = Compiler.GCC, + timing: bool = True, + cuda_block_size: tuple = (32, 1, 1) + ) -> None: if path is None: path = Path('.') else: @@ -46,7 +49,9 @@ def generate_benchmark_gpu(kernel_asts: Union[KernelFunction, List[KernelFunctio f.write(source) with open(src_path / 'main.cu', 'w+') as f: - f.write(kernel_main(kernel_asts)) + f.write(kernel_main(kernel_asts, + timing=timing, + cuda_block_size=cuda_block_size)) copy_static_files(path) compiler_toolchain(path, compiler) @@ -56,6 +61,7 @@ def compiler_toolchain(path: Path, compiler: Compiler) -> None: name = compiler.name jinja_context = { 'compiler': name, + 'likwid': False, } files = ['Makefile', f'{name}.mk'] @@ -86,7 +92,7 @@ def copy_static_files(path: Path) -> None: f.write(template) -def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True, cuda_block_size: tuple = (32, 1, 1)): +def kernel_main(kernels_ast: List[KernelFunction], *, timing: bool = True, cuda_block_size: tuple): """ Return C code of a benchmark program for the given kernel. diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index 4ecd4d8e381a1b4a61416fdc2d2686510745f1b7..411eb8362f12a3027a4857ff4b8d5bce91bed683 100755 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -48,9 +48,9 @@ def test_generate_gpu(): a[0] @= b[0] + c[0] kernel_vadd = ps.create_kernel(**vadd) - temp_dir = Path('/home/markus/pystencils_benchmark_testfolder') + temp_dir = Path('/tmp/pystencils_benchmark_testfolder') generate_benchmark_gpu([kernel_vadd], temp_dir, compiler=compiler) - subprocess.run(['make', '-C', f'{temp_dir}'], check=True) - subprocess.run([f'{temp_dir}/benchmark-{compiler.name}', '10'], check=True) + # subprocess.run(['make', '-C', f'{temp_dir}'], check=True) + # subprocess.run([f'{temp_dir}/benchmark-{compiler.name}', '10'], check=True)