Skip to content
Snippets Groups Projects
Commit 69a150ed authored by Frederik Hennig's avatar Frederik Hennig
Browse files

first CUDA genscript test - not compiling just yet

parent e8f5f8ae
1 merge request!24Extend Support for CUDA and HIP kernel invocations
Pipeline #75147 failed with stages
in 2 minutes and 6 seconds
...@@ -470,19 +470,16 @@ class SfgBasicComposer(SfgIComposer): ...@@ -470,19 +470,16 @@ class SfgBasicComposer(SfgIComposer):
grid_size: ExprLike grid_size: ExprLike
block_size: ExprLike block_size: ExprLike
stream: ExprLike | None stream: ExprLike | None = kwargs.get("stream", None)
match launch_config: match launch_config:
case ManualLaunchConfiguration(): case ManualLaunchConfiguration():
grid_size = kwargs["grid_size"] grid_size = kwargs["grid_size"]
block_size = kwargs["block_size"] block_size = kwargs["block_size"]
stream = kwargs["stream"]
return _render_invocation(grid_size, block_size, stream) return _render_invocation(grid_size, block_size, stream)
case AutomaticLaunchConfiguration(): case AutomaticLaunchConfiguration():
stream = kwargs["stream"]
grid_size_entries = [ grid_size_entries = [
self.expr_from_lambda(gs) for gs in launch_config._grid_size self.expr_from_lambda(gs) for gs in launch_config._grid_size
] ]
...@@ -502,9 +499,9 @@ class SfgBasicComposer(SfgIComposer): ...@@ -502,9 +499,9 @@ class SfgBasicComposer(SfgIComposer):
return SfgBlock(SfgSequence(nodes)) return SfgBlock(SfgSequence(nodes))
case DynamicBlockSizeLaunchConfiguration(): case DynamicBlockSizeLaunchConfiguration():
user_block_size: ExprLike | None = kwargs["block_size"] user_block_size: ExprLike | None = kwargs.get("block_size", None)
stream = kwargs["stream"]
block_size_init_args: tuple[ExprLike, ...]
if user_block_size is None: if user_block_size is None:
if launch_config.block_size is None: if launch_config.block_size is None:
raise ValueError( raise ValueError(
...@@ -521,11 +518,12 @@ class SfgBasicComposer(SfgIComposer): ...@@ -521,11 +518,12 @@ class SfgBasicComposer(SfgIComposer):
from ..lang.cpp import std from ..lang.cpp import std
witem_types = [wit.return_type for wit in launch_config.num_work_items]
work_items_entries = [ work_items_entries = [
self.expr_from_lambda(wit) for wit in launch_config.num_work_items self.expr_from_lambda(wit) for wit in launch_config.num_work_items
] ]
work_items_var = std.tuple(*witem_types, const=True).var("__work_items") work_items_var = std.tuple(
"uint32_t", "uint32_t", "uint32_t", const=True
).var("__work_items")
def _min(a: ExprLike, b: ExprLike): def _min(a: ExprLike, b: ExprLike):
return AugExpr.format("{a} < {b} ? {a} : {b}", a=a, b=b) return AugExpr.format("{a} < {b} ? {a} : {b}", a=a, b=b)
...@@ -536,7 +534,7 @@ class SfgBasicComposer(SfgIComposer): ...@@ -536,7 +534,7 @@ class SfgBasicComposer(SfgIComposer):
reduced_block_size_entries = [ reduced_block_size_entries = [
_min(work_items_var.get(i), bs) _min(work_items_var.get(i), bs)
for i, bs in enumerate( for i, bs in enumerate(
[block_size_var.x(), block_size_var.y(), block_size_var.z()] [block_size_var.x, block_size_var.y, block_size_var.z]
) )
] ]
reduced_block_size_var = dim3(const=True).var("__reduced_block_size") reduced_block_size_var = dim3(const=True).var("__reduced_block_size")
...@@ -545,9 +543,9 @@ class SfgBasicComposer(SfgIComposer): ...@@ -545,9 +543,9 @@ class SfgBasicComposer(SfgIComposer):
_div_ceil(work_items_var.get(i), bs) _div_ceil(work_items_var.get(i), bs)
for i, bs in enumerate( for i, bs in enumerate(
[ [
reduced_block_size_var.x(), reduced_block_size_var.x,
reduced_block_size_var.y(), reduced_block_size_var.y,
reduced_block_size_var.z(), reduced_block_size_var.z,
] ]
) )
] ]
......
...@@ -90,6 +90,20 @@ StlContainers1D: ...@@ -90,6 +90,20 @@ StlContainers1D:
MdSpanFixedShapeLayouts: MdSpanFixedShapeLayouts:
MdSpanLbStreaming: MdSpanLbStreaming:
# CUDA
CudaKernels:
sfg-args:
file-extensions: ["cuh", "cu"]
compile:
cxx: nvcc
cxx-flags:
- -std=c++20
- -Werror
- all-warnings
- --expt-relaxed-constexpr
skip-if-not-found: true
# SYCL # SYCL
SyclKernels: SyclKernels:
......
from pystencilssfg import SourceFileGenerator
from pystencilssfg.lang.cuda import dim3
from pystencilssfg.lang.cpp import std
import pystencils as ps
std.mdspan.configure(namespace="std::experimental", header="<experimental/mdspan>")
with SourceFileGenerator() as sfg:
src, dst = ps.fields("src, dst: double[3D]", layout="c")
asm = ps.Assignment(dst(0), 2 * src(0))
cfg = ps.CreateKernelConfig(target=ps.Target.CUDA)
khandle = sfg.kernels.create(asm, "scale", cfg)
block_size = dim3().var("blockSize")
sfg.function("invoke")(
sfg.map_field(src, std.mdspan.from_field(src)),
sfg.map_field(dst, std.mdspan.from_field(dst)),
sfg.cuda_invoke(khandle, block_size=block_size)
)
...@@ -12,16 +12,19 @@ import shutil ...@@ -12,16 +12,19 @@ import shutil
import warnings import warnings
import subprocess import subprocess
from pystencils.include import get_pystencils_include_path
THIS_DIR = pathlib.Path(__file__).parent THIS_DIR = pathlib.Path(__file__).parent
DEPS_DIR = THIS_DIR / "deps" DEPS_DIR = THIS_DIR / "deps"
MDSPAN_QUAL_PATH = "mdspan-mdspan-0.6.0/include/" MDSPAN_QUAL_PATH = "mdspan-mdspan-0.6.0/include/"
PYSTENCILS_RT_INCLUDE_PATH = get_pystencils_include_path()
TEST_INDEX = THIS_DIR / "index.yaml" TEST_INDEX = THIS_DIR / "index.yaml"
SOURCE_DIR = THIS_DIR / "source" SOURCE_DIR = THIS_DIR / "source"
EXPECTED_DIR = THIS_DIR / "expected" EXPECTED_DIR = THIS_DIR / "expected"
CXX_INCLUDE_FLAGS = ["-I", f"{DEPS_DIR}/{MDSPAN_QUAL_PATH}"] CXX_INCLUDE_FLAGS = ["-I", f"{DEPS_DIR}/{MDSPAN_QUAL_PATH},{PYSTENCILS_RT_INCLUDE_PATH}"]
def prepare_deps(): def prepare_deps():
...@@ -101,7 +104,7 @@ class GenScriptTest: ...@@ -101,7 +104,7 @@ class GenScriptTest:
for ext in self._expected_extensions: for ext in self._expected_extensions:
fname = f"{self._name}.{ext}" fname = f"{self._name}.{ext}"
self._expected_files.add(fname) self._expected_files.add(fname)
if ext in ("cpp", "cxx", "c++"): if ext in ("cpp", "cxx", "c++", "cu", "hip"):
self._files_to_compile.append(fname) self._files_to_compile.append(fname)
compile_descr: dict = test_description.get("compile", dict()) compile_descr: dict = test_description.get("compile", dict())
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment