From 69a150edeeeec3b618ba1b70fef296db245dc5c4 Mon Sep 17 00:00:00 2001
From: Frederik Hennig <frederik.hennig@fau.de>
Date: Wed, 5 Mar 2025 16:43:52 +0100
Subject: [PATCH] first CUDA genscript test - not compiling just yet

---
 src/pystencilssfg/composer/basic_composer.py  | 22 ++++++++----------
 tests/generator_scripts/index.yaml            | 14 +++++++++++
 tests/generator_scripts/source/CudaKernels.py | 23 +++++++++++++++++++
 .../test_generator_scripts.py                 |  7 ++++--
 4 files changed, 52 insertions(+), 14 deletions(-)
 create mode 100644 tests/generator_scripts/source/CudaKernels.py

diff --git a/src/pystencilssfg/composer/basic_composer.py b/src/pystencilssfg/composer/basic_composer.py
index 686b60d..0466e6c 100644
--- a/src/pystencilssfg/composer/basic_composer.py
+++ b/src/pystencilssfg/composer/basic_composer.py
@@ -470,19 +470,16 @@ class SfgBasicComposer(SfgIComposer):
 
         grid_size: ExprLike
         block_size: ExprLike
-        stream: ExprLike | None
+        stream: ExprLike | None = kwargs.get("stream", None)
 
         match launch_config:
             case ManualLaunchConfiguration():
                 grid_size = kwargs["grid_size"]
                 block_size = kwargs["block_size"]
-                stream = kwargs["stream"]
 
                 return _render_invocation(grid_size, block_size, stream)
 
             case AutomaticLaunchConfiguration():
-                stream = kwargs["stream"]
-
                 grid_size_entries = [
                     self.expr_from_lambda(gs) for gs in launch_config._grid_size
                 ]
@@ -502,9 +499,9 @@ class SfgBasicComposer(SfgIComposer):
                 return SfgBlock(SfgSequence(nodes))
 
             case DynamicBlockSizeLaunchConfiguration():
-                user_block_size: ExprLike | None = kwargs["block_size"]
-                stream = kwargs["stream"]
+                user_block_size: ExprLike | None = kwargs.get("block_size", None)
 
+                block_size_init_args: tuple[ExprLike, ...]
                 if user_block_size is None:
                     if launch_config.block_size is None:
                         raise ValueError(
@@ -521,11 +518,12 @@ class SfgBasicComposer(SfgIComposer):
 
                 from ..lang.cpp import std
 
-                witem_types = [wit.return_type for wit in launch_config.num_work_items]
                 work_items_entries = [
                     self.expr_from_lambda(wit) for wit in launch_config.num_work_items
                 ]
-                work_items_var = std.tuple(*witem_types, const=True).var("__work_items")
+                work_items_var = std.tuple(
+                    "uint32_t", "uint32_t", "uint32_t", const=True
+                ).var("__work_items")
 
                 def _min(a: ExprLike, b: ExprLike):
                     return AugExpr.format("{a} < {b} ? {a} : {b}", a=a, b=b)
@@ -536,7 +534,7 @@ class SfgBasicComposer(SfgIComposer):
                 reduced_block_size_entries = [
                     _min(work_items_var.get(i), bs)
                     for i, bs in enumerate(
-                        [block_size_var.x(), block_size_var.y(), block_size_var.z()]
+                        [block_size_var.x, block_size_var.y, block_size_var.z]
                     )
                 ]
                 reduced_block_size_var = dim3(const=True).var("__reduced_block_size")
@@ -545,9 +543,9 @@ class SfgBasicComposer(SfgIComposer):
                     _div_ceil(work_items_var.get(i), bs)
                     for i, bs in enumerate(
                         [
-                            reduced_block_size_var.x(),
-                            reduced_block_size_var.y(),
-                            reduced_block_size_var.z(),
+                            reduced_block_size_var.x,
+                            reduced_block_size_var.y,
+                            reduced_block_size_var.z,
                         ]
                     )
                 ]
diff --git a/tests/generator_scripts/index.yaml b/tests/generator_scripts/index.yaml
index 1c97aaf..bfbedda 100644
--- a/tests/generator_scripts/index.yaml
+++ b/tests/generator_scripts/index.yaml
@@ -90,6 +90,20 @@ StlContainers1D:
 MdSpanFixedShapeLayouts:
 MdSpanLbStreaming:
 
+# CUDA
+
+CudaKernels:
+  sfg-args:
+    file-extensions: ["cuh", "cu"]
+  compile:
+    cxx: nvcc
+    cxx-flags: 
+      - -std=c++20
+      - -Werror
+      - all-warnings
+      - --expt-relaxed-constexpr
+    skip-if-not-found: true
+
 # SYCL
 
 SyclKernels:
diff --git a/tests/generator_scripts/source/CudaKernels.py b/tests/generator_scripts/source/CudaKernels.py
new file mode 100644
index 0000000..9bd37a5
--- /dev/null
+++ b/tests/generator_scripts/source/CudaKernels.py
@@ -0,0 +1,23 @@
+from pystencilssfg import SourceFileGenerator
+from pystencilssfg.lang.cuda import dim3
+from pystencilssfg.lang.cpp import std
+
+import pystencils as ps
+
+std.mdspan.configure(namespace="std::experimental", header="<experimental/mdspan>")
+
+with SourceFileGenerator() as sfg:
+
+    src, dst = ps.fields("src, dst: double[3D]", layout="c")
+    asm = ps.Assignment(dst(0), 2 * src(0))
+    cfg = ps.CreateKernelConfig(target=ps.Target.CUDA)
+
+    khandle = sfg.kernels.create(asm, "scale", cfg)
+
+    block_size = dim3().var("blockSize")
+    
+    sfg.function("invoke")(
+        sfg.map_field(src, std.mdspan.from_field(src)),
+        sfg.map_field(dst, std.mdspan.from_field(dst)),
+        sfg.cuda_invoke(khandle, block_size=block_size)
+    )
diff --git a/tests/generator_scripts/test_generator_scripts.py b/tests/generator_scripts/test_generator_scripts.py
index 6f2ff16..8571d25 100644
--- a/tests/generator_scripts/test_generator_scripts.py
+++ b/tests/generator_scripts/test_generator_scripts.py
@@ -12,16 +12,19 @@ import shutil
 import warnings
 import subprocess
 
+from pystencils.include import get_pystencils_include_path
+
 THIS_DIR = pathlib.Path(__file__).parent
 
 DEPS_DIR = THIS_DIR / "deps"
 MDSPAN_QUAL_PATH = "mdspan-mdspan-0.6.0/include/"
 
+PYSTENCILS_RT_INCLUDE_PATH = get_pystencils_include_path()
 
 TEST_INDEX = THIS_DIR / "index.yaml"
 SOURCE_DIR = THIS_DIR / "source"
 EXPECTED_DIR = THIS_DIR / "expected"
-CXX_INCLUDE_FLAGS = ["-I", f"{DEPS_DIR}/{MDSPAN_QUAL_PATH}"]
+CXX_INCLUDE_FLAGS = ["-I", f"{DEPS_DIR}/{MDSPAN_QUAL_PATH},{PYSTENCILS_RT_INCLUDE_PATH}"]
 
 
 def prepare_deps():
@@ -101,7 +104,7 @@ class GenScriptTest:
         for ext in self._expected_extensions:
             fname = f"{self._name}.{ext}"
             self._expected_files.add(fname)
-            if ext in ("cpp", "cxx", "c++"):
+            if ext in ("cpp", "cxx", "c++", "cu", "hip"):
                 self._files_to_compile.append(fname)
 
         compile_descr: dict = test_description.get("compile", dict())
-- 
GitLab