Skip to content
Snippets Groups Projects
Commit 39b4029c authored by Christoph Alt's avatar Christoph Alt
Browse files

Exposing the cuda block size option to the generate_benchmark function

parent ac4b31c1
No related branches found
No related tags found
1 merge request!1Add CUDA support
......@@ -6,7 +6,7 @@ from jinja2 import Environment, PackageLoader, StrictUndefined
import numpy as np
from pystencils.backends.cbackend import generate_c, get_headers
from pystencils.astnodes import KernelFunction
from pystencils.astnodes import KernelFunction, PragmaBlock
from pystencils.enums import Backend
from pystencils.typing import get_base_type
from pystencils.sympyextensions import prod
......
......@@ -20,7 +20,10 @@ _env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=Stric
def generate_benchmark_gpu(kernel_asts: Union[KernelFunction, List[KernelFunction]],
path: Path = None,
*,
compiler: Compiler = Compiler.GCC) -> None:
compiler: Compiler = Compiler.GCC,
timing: bool = True,
cuda_block_size: tuple = (32, 1, 1)
) -> None:
if path is None:
path = Path('.')
else:
......@@ -46,7 +49,9 @@ def generate_benchmark_gpu(kernel_asts: Union[KernelFunction, List[KernelFunctio
f.write(source)
with open(src_path / 'main.cu', 'w+') as f:
f.write(kernel_main(kernel_asts))
f.write(kernel_main(kernel_asts,
timing=timing,
cuda_block_size=cuda_block_size))
copy_static_files(path)
compiler_toolchain(path, compiler)
......@@ -56,6 +61,7 @@ def compiler_toolchain(path: Path, compiler: Compiler) -> None:
name = compiler.name
jinja_context = {
'compiler': name,
'likwid': False,
}
files = ['Makefile', f'{name}.mk']
......@@ -86,7 +92,7 @@ def copy_static_files(path: Path) -> None:
f.write(template)
def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True, cuda_block_size: tuple = (32, 1, 1)):
def kernel_main(kernels_ast: List[KernelFunction], *, timing: bool = True, cuda_block_size: tuple):
"""
Return C code of a benchmark program for the given kernel.
......
......@@ -48,9 +48,9 @@ def test_generate_gpu():
a[0] @= b[0] + c[0]
kernel_vadd = ps.create_kernel(**vadd)
temp_dir = Path('/home/markus/pystencils_benchmark_testfolder')
temp_dir = Path('/tmp/pystencils_benchmark_testfolder')
generate_benchmark_gpu([kernel_vadd], temp_dir, compiler=compiler)
subprocess.run(['make', '-C', f'{temp_dir}'], check=True)
subprocess.run([f'{temp_dir}/benchmark-{compiler.name}', '10'], check=True)
# subprocess.run(['make', '-C', f'{temp_dir}'], check=True)
# subprocess.run([f'{temp_dir}/benchmark-{compiler.name}', '10'], check=True)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment