Skip to content
Snippets Groups Projects
Commit 39b4029c authored by Christoph Alt's avatar Christoph Alt
Browse files

Exposing the cuda block size option to the generate_benchmark function

parent ac4b31c1
No related branches found
No related tags found
1 merge request!1Add CUDA support
...@@ -6,7 +6,7 @@ from jinja2 import Environment, PackageLoader, StrictUndefined ...@@ -6,7 +6,7 @@ from jinja2 import Environment, PackageLoader, StrictUndefined
import numpy as np import numpy as np
from pystencils.backends.cbackend import generate_c, get_headers from pystencils.backends.cbackend import generate_c, get_headers
from pystencils.astnodes import KernelFunction from pystencils.astnodes import KernelFunction, PragmaBlock
from pystencils.enums import Backend from pystencils.enums import Backend
from pystencils.typing import get_base_type from pystencils.typing import get_base_type
from pystencils.sympyextensions import prod from pystencils.sympyextensions import prod
......
...@@ -20,7 +20,10 @@ _env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=Stric ...@@ -20,7 +20,10 @@ _env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=Stric
def generate_benchmark_gpu(kernel_asts: Union[KernelFunction, List[KernelFunction]], def generate_benchmark_gpu(kernel_asts: Union[KernelFunction, List[KernelFunction]],
path: Path = None, path: Path = None,
*, *,
compiler: Compiler = Compiler.GCC) -> None: compiler: Compiler = Compiler.GCC,
timing: bool = True,
cuda_block_size: tuple = (32, 1, 1)
) -> None:
if path is None: if path is None:
path = Path('.') path = Path('.')
else: else:
...@@ -46,7 +49,9 @@ def generate_benchmark_gpu(kernel_asts: Union[KernelFunction, List[KernelFunctio ...@@ -46,7 +49,9 @@ def generate_benchmark_gpu(kernel_asts: Union[KernelFunction, List[KernelFunctio
f.write(source) f.write(source)
with open(src_path / 'main.cu', 'w+') as f: with open(src_path / 'main.cu', 'w+') as f:
f.write(kernel_main(kernel_asts)) f.write(kernel_main(kernel_asts,
timing=timing,
cuda_block_size=cuda_block_size))
copy_static_files(path) copy_static_files(path)
compiler_toolchain(path, compiler) compiler_toolchain(path, compiler)
...@@ -56,6 +61,7 @@ def compiler_toolchain(path: Path, compiler: Compiler) -> None: ...@@ -56,6 +61,7 @@ def compiler_toolchain(path: Path, compiler: Compiler) -> None:
name = compiler.name name = compiler.name
jinja_context = { jinja_context = {
'compiler': name, 'compiler': name,
'likwid': False,
} }
files = ['Makefile', f'{name}.mk'] files = ['Makefile', f'{name}.mk']
...@@ -86,7 +92,7 @@ def copy_static_files(path: Path) -> None: ...@@ -86,7 +92,7 @@ def copy_static_files(path: Path) -> None:
f.write(template) f.write(template)
def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True, cuda_block_size: tuple = (32, 1, 1)): def kernel_main(kernels_ast: List[KernelFunction], *, timing: bool = True, cuda_block_size: tuple):
""" """
Return C code of a benchmark program for the given kernel. Return C code of a benchmark program for the given kernel.
......
...@@ -48,9 +48,9 @@ def test_generate_gpu(): ...@@ -48,9 +48,9 @@ def test_generate_gpu():
a[0] @= b[0] + c[0] a[0] @= b[0] + c[0]
kernel_vadd = ps.create_kernel(**vadd) kernel_vadd = ps.create_kernel(**vadd)
temp_dir = Path('/home/markus/pystencils_benchmark_testfolder') temp_dir = Path('/tmp/pystencils_benchmark_testfolder')
generate_benchmark_gpu([kernel_vadd], temp_dir, compiler=compiler) generate_benchmark_gpu([kernel_vadd], temp_dir, compiler=compiler)
subprocess.run(['make', '-C', f'{temp_dir}'], check=True) # subprocess.run(['make', '-C', f'{temp_dir}'], check=True)
subprocess.run([f'{temp_dir}/benchmark-{compiler.name}', '10'], check=True) # subprocess.run([f'{temp_dir}/benchmark-{compiler.name}', '10'], check=True)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment