Skip to content
Snippets Groups Projects
Commit 4b1f3f53 authored by Christoph Alt's avatar Christoph Alt
Browse files

fixed the _add_launch_bounds and also added some small tests

parent 4ee400e9
1 merge request!1Add CUDA support
Pipeline #55133 skipped with stage
......@@ -18,9 +18,13 @@ from pystencils_benchmark.enums import Compiler
def _add_launch_bound(code: str, launch_bounds: tuple) -> str:
lb_str = f"__launch_bounds__({','.join(str(lb) for lb in launch_bounds)})"
splitted = code.split("void")
return splitted[0] + lb_str + "".join(splitted[1:])
lb_str = f"__launch_bounds__({', '.join(str(lb) for lb in launch_bounds)}) "
splitted = code.split("void ")
prefix = splitted[0]
if code.startswith("void "):
# just in case that there is nothing before the first void
prefix = ""
return prefix + "void " + lb_str + "void ".join(splitted[1:])
def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]],
......
import numpy as np
import pystencils as ps
from pystencils_benchmark.gpu.benchmark import kernel_header, _add_launch_bound, kernel_source
def test_launch_bounds():
a, b, c = ps.fields(a=np.ones(4000000), b=np.ones(4000000), c=np.ones(4000000))
@ps.kernel_config(ps.CreateKernelConfig(target=ps.Target.GPU))
def vadd():
a[0] @= b[0] + c[0]
kernel_vadd = ps.create_kernel(**vadd)
launch_bounds = (256, 2)
header = kernel_header(kernel_vadd)
header = _add_launch_bound(header, launch_bounds)
assert "void __launch_bounds__(256, 2)" in header
source = kernel_source(kernel_vadd)
source = _add_launch_bound(source, launch_bounds)
assert "void __launch_bounds__(256, 2)" in source
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment