diff --git a/src/pystencils/codegen/__init__.py b/src/pystencils/codegen/__init__.py index d06c18382bbc21a7bd9a9a41849d586f88215814..1b2cd2ffb6256ecd2dbc287e5f4f4c7a3a723dc4 100644 --- a/src/pystencils/codegen/__init__.py +++ b/src/pystencils/codegen/__init__.py @@ -7,6 +7,7 @@ from .parameters import Parameter from .kernel import Kernel, GpuKernel from .driver import create_kernel, get_driver from .functions import Lambda +from .errors import CodegenError __all__ = [ "Target", @@ -18,4 +19,5 @@ __all__ = [ "Lambda", "create_kernel", "get_driver", + "CodegenError", ] diff --git a/src/pystencils/codegen/gpu_indexing.py b/src/pystencils/codegen/gpu_indexing.py index afd2958c16161b869b9caf69785c3eac9f287949..2d22ec624856d9cf8a0b825845fee04caaa4ee74 100644 --- a/src/pystencils/codegen/gpu_indexing.py +++ b/src/pystencils/codegen/gpu_indexing.py @@ -131,7 +131,7 @@ class ManualLaunchConfiguration(GpuLaunchConfiguration): class DynamicBlockSizeLaunchConfiguration(GpuLaunchConfiguration): """GPU launch configuration that permits the user to set a block size and dynamically computes the grid size. - + The actual launch grid size is computed from the user-defined ``user_block_size`` and the number of work items in the kernel's iteration space as follows. For each dimension :math:`c \\in \\{ x, y, z \\}`, @@ -201,7 +201,7 @@ class DynamicBlockSizeLaunchConfiguration(GpuLaunchConfiguration): class GpuIndexing: """Factory for GPU indexing objects required during code generation. - + This class acts as a helper class for the code generation driver. It produces both the `ThreadMapping` required by the backend, as well as factories for the launch configuration required later by the runtime system. @@ -259,6 +259,12 @@ class GpuIndexing: work_items_expr = self._get_work_items() rank = len(work_items_expr) + if rank > 3: + raise CodegenError( + "Cannot create a launch grid configuration using the Linear3D indexing scheme" + f" for a {rank}-dimensional kernel." + ) + num_work_items = cast( _Dim3Lambda, tuple(Lambda.from_expression(self._ctx, wit) for wit in work_items_expr), @@ -328,10 +334,6 @@ class GpuIndexing: match ispace: case FullIterationSpace(): dimensions = ispace.dimensions_in_loop_order()[::-1] - if len(dimensions) > 3: - raise NotImplementedError( - f"Cannot create a GPU threads range for an {len(dimensions)}-dimensional iteration space" - ) from ..backend.ast.analysis import collect_undefined_symbols as collect diff --git a/tests/kernelcreation/test_gpu.py b/tests/kernelcreation/test_gpu.py index 75239c9b10c404c6acf88c61908293c578623ba6..10b37e610cebd23c9fc961f14118aee5f24582c4 100644 --- a/tests/kernelcreation/test_gpu.py +++ b/tests/kernelcreation/test_gpu.py @@ -11,7 +11,6 @@ from pystencils import ( CreateKernelConfig, create_kernel, Target, - assignment_from_stencil, ) from pystencils.slicing import ( @@ -77,6 +76,17 @@ def test_indexing_options( cp.testing.assert_allclose(dst_arr, expected) +def test_invalid_indexing_schemes(): + src, dst = fields("src, dst: [4D]") + asm = Assignment(src.center(0), dst.center(0)) + + cfg = CreateKernelConfig(target=Target.CUDA) + cfg.gpu.indexing_scheme = "linear3d" + + with pytest.raises(Exception): + create_kernel(asm, cfg) + + def test_averaging_kernel(): size = (40, 55) src_arr = np.random.rand(*size)