diff --git a/.gitignore b/.gitignore index 65c40647dfc694cf861ac9f1ea2ffb05de4d12f2..399dabec684a263e78c3fad0521f84f3807e5185 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ # dev environment **/.venv +**/venv # build artifacts dist @@ -22,4 +23,4 @@ htmlcov coverage.xml # mkdocs -site \ No newline at end of file +site diff --git a/integration/test_sycl_buffer.py b/integration/test_sycl_buffer.py new file mode 100644 index 0000000000000000000000000000000000000000..a1e52eb85af599814a2192b726afd4aae2fd084d --- /dev/null +++ b/integration/test_sycl_buffer.py @@ -0,0 +1,33 @@ +from pystencils import Target, CreateKernelConfig, no_jit +from lbmpy import create_lb_update_rule, LBMOptimisation +from pystencilssfg import SourceFileGenerator, SfgConfig, OutputMode +from pystencilssfg.lang.cpp.sycl_accessor import sycl_accessor_ref +import pystencilssfg.extensions.sycl as sycl +from itertools import chain + +sfg_config = SfgConfig( + output_directory="out/test_sycl_buffer", + outer_namespace="gen_code", + output_mode=OutputMode.INLINE, +) + +with SourceFileGenerator(sfg_config) as sfg: + sfg = sycl.SyclComposer(sfg) + gen_config = CreateKernelConfig(target=Target.SYCL, jit=no_jit) + opt = LBMOptimisation(field_layout="fzyx") + update = create_lb_update_rule(lbm_optimisation=opt) + kernel = sfg.kernels.create(update, "lbm_update", gen_config) + + cgh = sfg.sycl_handler("handler") + rang = sfg.sycl_range(update.method.dim, "range") + mappings = [ + sfg.map_field(field, sycl_accessor_ref(field)) + for field in chain(update.free_fields, update.bound_fields) + ] + + sfg.function("lb_update")( + cgh.parallel_for(rang)( + *mappings, + sfg.call(kernel), + ), + ) diff --git a/src/pystencilssfg/extensions/sycl.py b/src/pystencilssfg/extensions/sycl.py index 3cb0c1c5e50aa2b9557a176f3c541283641ad530..2f3591cf700adc20dbe810a0ec3ffe07a89b4a29 100644 --- a/src/pystencilssfg/extensions/sycl.py +++ b/src/pystencilssfg/extensions/sycl.py @@ -6,6 +6,8 @@ import re from pystencils.types import PsType, PsCustomType from pystencils.enums import Target +from pystencilssfg.composer.basic_composer import SequencerArg + from ..exceptions import SfgException from ..context import SfgContext from ..composer import ( @@ -13,6 +15,7 @@ from ..composer import ( SfgClassComposer, SfgComposer, SfgComposerMixIn, + make_sequence, ) from ..ir.source_components import SfgKernelHandle, SfgHeaderInclude from ..ir import ( @@ -56,20 +59,32 @@ class SyclHandler(AugExpr): self._ctx = ctx - def parallel_for(self, range: SfgVar | Sequence[int], kernel: SfgKernelHandle): + def parallel_for( + self, + range: SfgVar | Sequence[int], + ): """Generate a ``parallel_for`` kernel invocation using this command group handler. + The syntax of this uses a chain of two calls to mimic C++ syntax: + + .. code-block:: Python + + sfg.parallel_for(range)( + # Body + ) + + The body is constructed via sequencing (see `make_sequence`). Args: range: Object, or tuple of integers, indicating the kernel's iteration range - kernel: Handle to the pystencils-kernel to be executed """ self._ctx.add_include(SfgHeaderInclude("sycl/sycl.hpp", system_header=True)) - kfunc = kernel.get_kernel_function() - if kfunc.target != Target.SYCL: - raise SfgException( - f"Kernel given to `parallel_for` is no SYCL kernel: {kernel.kernel_name}" - ) + def check_kernel(kernel: SfgKernelHandle): + kfunc = kernel.get_kernel_function() + if kfunc.target != Target.SYCL: + raise SfgException( + f"Kernel given to `parallel_for` is no SYCL kernel: {kernel.kernel_name}" + ) id_regex = re.compile(r"sycl::(id|item|nd_item)<\s*[0-9]\s*>") @@ -79,12 +94,25 @@ class SyclHandler(AugExpr): and id_regex.search(param.dtype.c_string()) is not None ) - id_param = list(filter(filter_id, kernel.scalar_parameters))[0] - - tree = SfgKernelCallNode(kernel) + def sequencer(*args: SequencerArg): + id_param = [] + for arg in args: + if isinstance(arg, SfgKernelCallNode): + check_kernel(arg._kernel_handle) + id_param.append(list(filter(filter_id, arg._kernel_handle.scalar_parameters))[0]) + + if not all(item == id_param[0] for item in id_param): + raise ValueError( + "id_param should be the same for all kernels in parallel_for" + ) + tree = make_sequence(*args) + + kernel_lambda = SfgLambda(("=",), (id_param[0],), tree, None) + return SyclKernelInvoke( + self, SyclInvokeType.ParallelFor, range, kernel_lambda + ) - kernel_lambda = SfgLambda(("=",), (id_param,), tree, None) - return SyclKernelInvoke(self, SyclInvokeType.ParallelFor, range, kernel_lambda) + return sequencer class SyclGroup(AugExpr): diff --git a/src/pystencilssfg/lang/cpp/sycl_accessor.py b/src/pystencilssfg/lang/cpp/sycl_accessor.py new file mode 100644 index 0000000000000000000000000000000000000000..f704477b3a92925acf7e318095c8fb67af32cfb2 --- /dev/null +++ b/src/pystencilssfg/lang/cpp/sycl_accessor.py @@ -0,0 +1,79 @@ +from ...lang import SrcField, IFieldExtraction +from ...ir.source_components import SfgHeaderInclude + +from pystencils import Field +from pystencils.types import ( + PsType, + PsCustomType, +) + +from pystencilssfg.lang.expressions import AugExpr + + +class SyclAccessor(SrcField): + def __init__( + self, + T: PsType, + dimensions: int, + reference: bool = False, + ): + cpp_typestr = T.c_string() + if 3 < dimensions: + raise ValueError("sycl accessors can only have dims 1, 2 or 3") + typestring = ( + f"sycl::accessor< {cpp_typestr}, {dimensions} > {'&' if reference else ''}" + ) + super().__init__(PsCustomType(typestring)) + self._dim = dimensions + self._inner_stride = 1 + + @property + def required_includes(self) -> set[SfgHeaderInclude]: + return {SfgHeaderInclude("sycl/sycl.hpp", system_header=True)} + + def get_extraction(self) -> IFieldExtraction: + accessor = self + + class Extraction(IFieldExtraction): + def ptr(self) -> AugExpr: + return AugExpr.format( + "{}.get_multi_ptr<sycl::access::decorated::no>().get()", + accessor, + ) + + def size(self, coordinate: int) -> AugExpr | None: + if coordinate > accessor._dim: + return None + else: + return AugExpr.format( + "{}.get_range().get({})", accessor, coordinate + ) + + def stride(self, coordinate: int) -> AugExpr | None: + if coordinate > accessor._dim: + return None + elif coordinate == accessor._dim - 1: + return AugExpr.format("{}", accessor._inner_stride) + else: + exprs = [] + args = [] + for d in range(coordinate + 1, accessor._dim): + args.extend([accessor, d]) + exprs.append("{}.get_range().get({})") + expr = " * ".join(exprs) + expr += " * {}" + return AugExpr.format(expr, *args, accessor._inner_stride) + + return Extraction() + + +def sycl_accessor_ref(field: Field): + """Creates a `sycl::accessor &` for a given pystencils field.""" + # Sycl Accessor do not expose information about strides, so the linearization is like here + # https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#_multi_dimensional_objects_and_linearization + + return SyclAccessor( + field.dtype, + field.spatial_dimensions + field.index_dimensions, + reference=True, + ).var(field.name) diff --git a/tests/extensions/test_sycl.py b/tests/extensions/test_sycl.py new file mode 100644 index 0000000000000000000000000000000000000000..db99278c3a00a333400a7a18882163676c389d00 --- /dev/null +++ b/tests/extensions/test_sycl.py @@ -0,0 +1,64 @@ +import pytest +from pystencilssfg import SourceFileGenerator +import pystencilssfg.extensions.sycl as sycl +import pystencils as ps +from pystencilssfg import SfgContext + + +def test_parallel_for_1_kernels(): + sfg = sycl.SyclComposer(SfgContext()) + data_type = "double" + dim = 2 + f, g, h, i = ps.fields(f"f,g,h,i:{data_type}[{dim}D]") + assignement_1 = ps.Assignment(f.center(), g.center()) + assignement_2 = ps.Assignment(h.center(), i.center()) + + config = ps.CreateKernelConfig(target=ps.Target.SYCL) + kernel_1 = sfg.kernels.create(assignement_1, "kernel_1", config) + kernel_2 = sfg.kernels.create(assignement_2, "kernel_2", config) + cgh = sfg.sycl_handler("handler") + rang = sfg.sycl_range(dim, "range") + cgh.parallel_for(rang)( + sfg.call(kernel_1), + sfg.call(kernel_2), + ) + + +def test_parallel_for_2_kernels(): + sfg = sycl.SyclComposer(SfgContext()) + data_type = "double" + dim = 2 + f, g, h, i = ps.fields(f"f,g,h,i:{data_type}[{dim}D]") + assignement_1 = ps.Assignment(f.center(), g.center()) + assignement_2 = ps.Assignment(h.center(), i.center()) + + config = ps.CreateKernelConfig(target=ps.Target.SYCL) + kernel_1 = sfg.kernels.create(assignement_1, "kernel_1", config) + kernel_2 = sfg.kernels.create(assignement_2, "kernel_2", config) + cgh = sfg.sycl_handler("handler") + rang = sfg.sycl_range(dim, "range") + cgh.parallel_for(rang)( + sfg.call(kernel_1), + sfg.call(kernel_2), + ) + + +def test_parallel_for_2_kernels_fail(): + sfg = sycl.SyclComposer(SfgContext()) + data_type = "double" + dim = 2 + f, g = ps.fields(f"f,g:{data_type}[{dim}D]") + h, i = ps.fields(f"h,i:{data_type}[{dim-1}D]") + assignement_1 = ps.Assignment(f.center(), g.center()) + assignement_2 = ps.Assignment(h.center(), i.center()) + + config = ps.CreateKernelConfig(target=ps.Target.SYCL) + kernel_1 = sfg.kernels.create(assignement_1, "kernel_1", config) + kernel_2 = sfg.kernels.create(assignement_2, "kernel_2", config) + cgh = sfg.sycl_handler("handler") + rang = sfg.sycl_range(dim, "range") + with pytest.raises(ValueError): + cgh.parallel_for(rang)( + sfg.call(kernel_1), + sfg.call(kernel_2), + ) diff --git a/tests/generator_scripts/scripts/TestSyclBuffer.py b/tests/generator_scripts/scripts/TestSyclBuffer.py new file mode 100644 index 0000000000000000000000000000000000000000..d041756a08f07f74815366a09b79310eb2c5cca6 --- /dev/null +++ b/tests/generator_scripts/scripts/TestSyclBuffer.py @@ -0,0 +1,35 @@ +import pystencils as ps +import sympy as sp +from pystencilssfg import SourceFileGenerator +from pystencilssfg.lang.cpp.sycl_accessor import sycl_accessor_ref +import pystencilssfg.extensions.sycl as sycl + + +with SourceFileGenerator() as sfg: + sfg = sycl.SyclComposer(sfg) + + u_src, u_dst, f = ps.fields("u_src, u_dst, f : double[2D]", layout="fzyx") + h = sp.Symbol("h") + + jacobi_update = [ + ps.Assignment( + u_dst.center(), + (h**2 * f[0, 0] + u_src[1, 0] + u_src[-1, 0] + u_src[0, 1] + u_src[0, -1]) + / 4, + ) + ] + + kernel_config = ps.CreateKernelConfig(target=ps.Target.SYCL) + jacobi_kernel = sfg.kernels.create(jacobi_update, config=kernel_config) + + cgh = sfg.sycl_handler("handler") + rang = sfg.sycl_range(2, "range") + mappings = [ + sfg.map_field(u_src, sycl_accessor_ref(u_src)), + sfg.map_field(u_dst, sycl_accessor_ref(u_dst)), + sfg.map_field(f, sycl_accessor_ref(f)), + ] + + sfg.function("jacobiUpdate")( + cgh.parallel_for(rang)(*mappings, sfg.call(jacobi_kernel)), + ) diff --git a/tests/generator_scripts/test_generator_scripts.py b/tests/generator_scripts/test_generator_scripts.py index 16adba2ec1ff9731f799609a5635cc49f3de5a0f..316e0e0f5a57aeb80f6c13ed23f39536ca1209ef 100644 --- a/tests/generator_scripts/test_generator_scripts.py +++ b/tests/generator_scripts/test_generator_scripts.py @@ -74,7 +74,7 @@ SCRIPTS = [ "--sfg-file-extensionss", ".c++,.h++", ), - should_fail=True + should_fail=True, ), ScriptInfo.make( "TestExtraCommandLineArgs", @@ -85,13 +85,19 @@ SCRIPTS = [ "--precision", "float32", "test1", - "test2" + "test2", ), ), ScriptInfo.make("Structural", ("hpp", "cpp")), ScriptInfo.make("SimpleJacobi", ("hpp", "cpp"), compilable_output="cpp"), ScriptInfo.make("SimpleClasses", ("hpp", "cpp")), ScriptInfo.make("Variables", ("hpp", "cpp"), compilable_output="cpp"), + ScriptInfo.make( + "TestSyclBuffer", + ("hpp", "cpp"), + compilable_output="cpp" if shutil.which("icpx") else None, + compile_cmd="icpx -fsycl -std=c++20" if shutil.which("icpx") else "", + ), ] @@ -113,13 +119,17 @@ def test_generator_script(script_info: ScriptInfo): shutil.rmtree(output_dir) os.makedirs(output_dir, exist_ok=True) - args = ["python", script_file, "--sfg-output-dir", output_dir] + list(script_info.args) + args = ["python", script_file, "--sfg-output-dir", output_dir] + list( + script_info.args + ) result = subprocess.run(args) if script_info.should_fail: if result.returncode == 0: - pytest.fail(f"Generator script {script_name} was supposed to fail, but didn't.") + pytest.fail( + f"Generator script {script_name} was supposed to fail, but didn't." + ) return if result.returncode != 0: diff --git a/tests/lang/test_sycl_accessor.py b/tests/lang/test_sycl_accessor.py new file mode 100644 index 0000000000000000000000000000000000000000..d2af2950a1bd2edf31f2da7086a29b8329339b00 --- /dev/null +++ b/tests/lang/test_sycl_accessor.py @@ -0,0 +1,36 @@ +import pytest + + +from pystencilssfg.lang.cpp.sycl_accessor import sycl_accessor_ref +import pystencils as ps + + +@pytest.mark.parametrize("data_type", ["double", "float"]) +@pytest.mark.parametrize("dim", [1, 2, 3]) +def test_spatial_field(data_type, dim): + f = ps.fields(f"f:{data_type}[{dim}D]") + ref = sycl_accessor_ref(f) + assert f"sycl::accessor< {data_type}, {dim} > &" in str(ref.get_dtype()) + + +@pytest.mark.parametrize("data_type", ["double", "float"]) +def test_too_large_dim(data_type): + dim = 4 + f = ps.fields(f"f:{data_type}[{dim}D]") + with pytest.raises(ValueError): + sycl_accessor_ref(f) + + +@pytest.mark.parametrize("data_type", ["double", "float"]) +@pytest.mark.parametrize("spatial_dim", [1, 2, 3]) +@pytest.mark.parametrize("index_dims", [1, 2, 3]) +def test_index_field(data_type, spatial_dim, index_dims): + index_shape = ("19",) * index_dims + total_dims = spatial_dim + index_dims + f = ps.fields(f"f({', '.join(index_shape)}):{data_type}[{spatial_dim}D]") + if total_dims <= 3: + ref = sycl_accessor_ref(f) + assert f"sycl::accessor< {data_type}, {total_dims} > &" in str(ref.get_dtype()) + else: + with pytest.raises(ValueError): + sycl_accessor_ref(f)