From 69a150edeeeec3b618ba1b70fef296db245dc5c4 Mon Sep 17 00:00:00 2001 From: Frederik Hennig <frederik.hennig@fau.de> Date: Wed, 5 Mar 2025 16:43:52 +0100 Subject: [PATCH] first CUDA genscript test - not compiling just yet --- src/pystencilssfg/composer/basic_composer.py | 22 ++++++++---------- tests/generator_scripts/index.yaml | 14 +++++++++++ tests/generator_scripts/source/CudaKernels.py | 23 +++++++++++++++++++ .../test_generator_scripts.py | 7 ++++-- 4 files changed, 52 insertions(+), 14 deletions(-) create mode 100644 tests/generator_scripts/source/CudaKernels.py diff --git a/src/pystencilssfg/composer/basic_composer.py b/src/pystencilssfg/composer/basic_composer.py index 686b60d..0466e6c 100644 --- a/src/pystencilssfg/composer/basic_composer.py +++ b/src/pystencilssfg/composer/basic_composer.py @@ -470,19 +470,16 @@ class SfgBasicComposer(SfgIComposer): grid_size: ExprLike block_size: ExprLike - stream: ExprLike | None + stream: ExprLike | None = kwargs.get("stream", None) match launch_config: case ManualLaunchConfiguration(): grid_size = kwargs["grid_size"] block_size = kwargs["block_size"] - stream = kwargs["stream"] return _render_invocation(grid_size, block_size, stream) case AutomaticLaunchConfiguration(): - stream = kwargs["stream"] - grid_size_entries = [ self.expr_from_lambda(gs) for gs in launch_config._grid_size ] @@ -502,9 +499,9 @@ class SfgBasicComposer(SfgIComposer): return SfgBlock(SfgSequence(nodes)) case DynamicBlockSizeLaunchConfiguration(): - user_block_size: ExprLike | None = kwargs["block_size"] - stream = kwargs["stream"] + user_block_size: ExprLike | None = kwargs.get("block_size", None) + block_size_init_args: tuple[ExprLike, ...] if user_block_size is None: if launch_config.block_size is None: raise ValueError( @@ -521,11 +518,12 @@ class SfgBasicComposer(SfgIComposer): from ..lang.cpp import std - witem_types = [wit.return_type for wit in launch_config.num_work_items] work_items_entries = [ self.expr_from_lambda(wit) for wit in launch_config.num_work_items ] - work_items_var = std.tuple(*witem_types, const=True).var("__work_items") + work_items_var = std.tuple( + "uint32_t", "uint32_t", "uint32_t", const=True + ).var("__work_items") def _min(a: ExprLike, b: ExprLike): return AugExpr.format("{a} < {b} ? {a} : {b}", a=a, b=b) @@ -536,7 +534,7 @@ class SfgBasicComposer(SfgIComposer): reduced_block_size_entries = [ _min(work_items_var.get(i), bs) for i, bs in enumerate( - [block_size_var.x(), block_size_var.y(), block_size_var.z()] + [block_size_var.x, block_size_var.y, block_size_var.z] ) ] reduced_block_size_var = dim3(const=True).var("__reduced_block_size") @@ -545,9 +543,9 @@ class SfgBasicComposer(SfgIComposer): _div_ceil(work_items_var.get(i), bs) for i, bs in enumerate( [ - reduced_block_size_var.x(), - reduced_block_size_var.y(), - reduced_block_size_var.z(), + reduced_block_size_var.x, + reduced_block_size_var.y, + reduced_block_size_var.z, ] ) ] diff --git a/tests/generator_scripts/index.yaml b/tests/generator_scripts/index.yaml index 1c97aaf..bfbedda 100644 --- a/tests/generator_scripts/index.yaml +++ b/tests/generator_scripts/index.yaml @@ -90,6 +90,20 @@ StlContainers1D: MdSpanFixedShapeLayouts: MdSpanLbStreaming: +# CUDA + +CudaKernels: + sfg-args: + file-extensions: ["cuh", "cu"] + compile: + cxx: nvcc + cxx-flags: + - -std=c++20 + - -Werror + - all-warnings + - --expt-relaxed-constexpr + skip-if-not-found: true + # SYCL SyclKernels: diff --git a/tests/generator_scripts/source/CudaKernels.py b/tests/generator_scripts/source/CudaKernels.py new file mode 100644 index 0000000..9bd37a5 --- /dev/null +++ b/tests/generator_scripts/source/CudaKernels.py @@ -0,0 +1,23 @@ +from pystencilssfg import SourceFileGenerator +from pystencilssfg.lang.cuda import dim3 +from pystencilssfg.lang.cpp import std + +import pystencils as ps + +std.mdspan.configure(namespace="std::experimental", header="<experimental/mdspan>") + +with SourceFileGenerator() as sfg: + + src, dst = ps.fields("src, dst: double[3D]", layout="c") + asm = ps.Assignment(dst(0), 2 * src(0)) + cfg = ps.CreateKernelConfig(target=ps.Target.CUDA) + + khandle = sfg.kernels.create(asm, "scale", cfg) + + block_size = dim3().var("blockSize") + + sfg.function("invoke")( + sfg.map_field(src, std.mdspan.from_field(src)), + sfg.map_field(dst, std.mdspan.from_field(dst)), + sfg.cuda_invoke(khandle, block_size=block_size) + ) diff --git a/tests/generator_scripts/test_generator_scripts.py b/tests/generator_scripts/test_generator_scripts.py index 6f2ff16..8571d25 100644 --- a/tests/generator_scripts/test_generator_scripts.py +++ b/tests/generator_scripts/test_generator_scripts.py @@ -12,16 +12,19 @@ import shutil import warnings import subprocess +from pystencils.include import get_pystencils_include_path + THIS_DIR = pathlib.Path(__file__).parent DEPS_DIR = THIS_DIR / "deps" MDSPAN_QUAL_PATH = "mdspan-mdspan-0.6.0/include/" +PYSTENCILS_RT_INCLUDE_PATH = get_pystencils_include_path() TEST_INDEX = THIS_DIR / "index.yaml" SOURCE_DIR = THIS_DIR / "source" EXPECTED_DIR = THIS_DIR / "expected" -CXX_INCLUDE_FLAGS = ["-I", f"{DEPS_DIR}/{MDSPAN_QUAL_PATH}"] +CXX_INCLUDE_FLAGS = ["-I", f"{DEPS_DIR}/{MDSPAN_QUAL_PATH},{PYSTENCILS_RT_INCLUDE_PATH}"] def prepare_deps(): @@ -101,7 +104,7 @@ class GenScriptTest: for ext in self._expected_extensions: fname = f"{self._name}.{ext}" self._expected_files.add(fname) - if ext in ("cpp", "cxx", "c++"): + if ext in ("cpp", "cxx", "c++", "cu", "hip"): self._files_to_compile.append(fname) compile_descr: dict = test_description.get("compile", dict()) -- GitLab