fixed the _add_launch_bounds and also added some small tests

4b1f3f53 · Christoph Alt · 4ee400e9 · 4b1f3f53 · 4b1f3f53
Commit 4b1f3f53 authored 1 year ago by Christoph Alt
--- a/pystencils_benchmark/gpu/benchmark.py
+++ b/pystencils_benchmark/gpu/benchmark.py
@@ -18,9 +18,13 @@ from pystencils_benchmark.enums import Compiler


 def _add_launch_bound(code: str, launch_bounds: tuple) -> str:
-    lb_str = f"__launch_bounds__({','.join(str(lb) for lb in launch_bounds)})"
-    splitted = code.split("void")
-    return splitted[0] + lb_str + "".join(splitted[1:])
+    lb_str = f"__launch_bounds__({', '.join(str(lb) for lb in launch_bounds)}) "
+    splitted = code.split("void ")
+    prefix = splitted[0]
+    if code.startswith("void "):
+        # just in case that there is nothing before the first void
+        prefix = ""
+    return prefix + "void " + lb_str + "void ".join(splitted[1:])


 def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]],

--- a/tests/test_launch_bounds.py
+++ b/tests/test_launch_bounds.py
+import numpy as np
+import pystencils as ps
+from pystencils_benchmark.gpu.benchmark import kernel_header, _add_launch_bound, kernel_source
+
+
+def test_launch_bounds():
+    a, b, c = ps.fields(a=np.ones(4000000), b=np.ones(4000000), c=np.ones(4000000))
+
+    @ps.kernel_config(ps.CreateKernelConfig(target=ps.Target.GPU))
+    def vadd():
+        a[0] @= b[0] + c[0]
+    kernel_vadd = ps.create_kernel(**vadd)
+    launch_bounds = (256, 2)
+    header = kernel_header(kernel_vadd)
+    header = _add_launch_bound(header, launch_bounds)
+    assert "void __launch_bounds__(256, 2)" in header
+    source = kernel_source(kernel_vadd)
+    source = _add_launch_bound(source, launch_bounds)
+    assert "void __launch_bounds__(256, 2)" in source