From a52c20a48077d255bc2e9d817b564ab11db0739d Mon Sep 17 00:00:00 2001 From: Frederik Hennig <frederik.hennig@fau.de> Date: Tue, 11 Mar 2025 11:17:57 +0100 Subject: [PATCH 1/2] pad work items tuple with ones in linear3d for rank < 3 --- src/pystencils/codegen/gpu_indexing.py | 5 +++ tests/kernelcreation/test_gpu.py | 48 +++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/src/pystencils/codegen/gpu_indexing.py b/src/pystencils/codegen/gpu_indexing.py index c93f0f959..27d6fc817 100644 --- a/src/pystencils/codegen/gpu_indexing.py +++ b/src/pystencils/codegen/gpu_indexing.py @@ -267,6 +267,11 @@ class GpuIndexing: f" for a {rank}-dimensional kernel." ) + work_items_expr += tuple( + self._ast_factory.parse_index(1) + for _ in range(3 - rank) + ) + num_work_items = cast( _Dim3Lambda, tuple(self._kernel_factory.create_lambda(wit) for wit in work_items_expr), diff --git a/tests/kernelcreation/test_gpu.py b/tests/kernelcreation/test_gpu.py index 10b37e610..45acaf5f8 100644 --- a/tests/kernelcreation/test_gpu.py +++ b/tests/kernelcreation/test_gpu.py @@ -31,7 +31,7 @@ except ImportError: @pytest.mark.parametrize("indexing_scheme", ["linear3d", "blockwise4d"]) @pytest.mark.parametrize("omit_range_check", [False, True]) @pytest.mark.parametrize("manual_grid", [False, True]) -def test_indexing_options( +def test_indexing_options_3d( indexing_scheme: str, omit_range_check: bool, manual_grid: bool ): src, dst = fields("src, dst: [3D]") @@ -76,6 +76,52 @@ def test_indexing_options( cp.testing.assert_allclose(dst_arr, expected) +@pytest.mark.parametrize("indexing_scheme", ["linear3d", "blockwise4d"]) +@pytest.mark.parametrize("omit_range_check", [False, True]) +@pytest.mark.parametrize("manual_grid", [False, True]) +def test_indexing_options_2d( + indexing_scheme: str, omit_range_check: bool, manual_grid: bool +): + src, dst = fields("src, dst: [2D]") + asm = Assignment( + dst.center(), + src[-1, 0] + + src[1, 0] + + src[0, -1] + + src[0, 1] + ) + + cfg = CreateKernelConfig(target=Target.CUDA) + cfg.gpu.indexing_scheme = indexing_scheme + cfg.gpu.omit_range_check = omit_range_check + cfg.gpu.manual_launch_grid = manual_grid + + ast = create_kernel(asm, cfg) + kernel = ast.compile() + + src_arr = cp.ones((18, 34)) + dst_arr = cp.zeros_like(src_arr) + + if manual_grid: + match indexing_scheme: + case "linear3d": + kernel.launch_config.block_size = (10, 8, 1) + kernel.launch_config.grid_size = (4, 4, 1) + case "blockwise4d": + kernel.launch_config.block_size = (40, 1, 1) + kernel.launch_config.grid_size = (32, 1, 1) + + elif indexing_scheme == "linear3d": + kernel.launch_config.block_size = (10, 8, 1) + + kernel(src=src_arr, dst=dst_arr) + + expected = cp.zeros_like(src_arr) + expected[1:-1, 1:-1].fill(4.0) + + cp.testing.assert_allclose(dst_arr, expected) + + def test_invalid_indexing_schemes(): src, dst = fields("src, dst: [4D]") asm = Assignment(src.center(0), dst.center(0)) -- GitLab From 466e36021c72926f5fa2659b2cb2eedbd53068e9 Mon Sep 17 00:00:00 2001 From: Frederik Hennig <frederik.hennig@fau.de> Date: Tue, 11 Mar 2025 11:20:42 +0100 Subject: [PATCH 2/2] fix 2d indexing test --- tests/kernelcreation/test_gpu.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/kernelcreation/test_gpu.py b/tests/kernelcreation/test_gpu.py index 45acaf5f8..f1905b1fc 100644 --- a/tests/kernelcreation/test_gpu.py +++ b/tests/kernelcreation/test_gpu.py @@ -99,17 +99,17 @@ def test_indexing_options_2d( ast = create_kernel(asm, cfg) kernel = ast.compile() - src_arr = cp.ones((18, 34)) + src_arr = cp.ones((18, 42)) dst_arr = cp.zeros_like(src_arr) if manual_grid: match indexing_scheme: case "linear3d": kernel.launch_config.block_size = (10, 8, 1) - kernel.launch_config.grid_size = (4, 4, 1) + kernel.launch_config.grid_size = (4, 2, 1) case "blockwise4d": kernel.launch_config.block_size = (40, 1, 1) - kernel.launch_config.grid_size = (32, 1, 1) + kernel.launch_config.grid_size = (16, 1, 1) elif indexing_scheme == "linear3d": kernel.launch_config.block_size = (10, 8, 1) -- GitLab