From f2045b96f40e096d26a601893b0c05e82d1083ef Mon Sep 17 00:00:00 2001 From: Frederik Hennig <frederik.hennig@fau.de> Date: Tue, 9 Jul 2024 22:14:00 +0200 Subject: [PATCH] fix dimension order in threads range --- src/pystencils/backend/platforms/generic_gpu.py | 4 +++- .../kernelcreation/platform/test_gpu_platforms.py | 9 +++++++-- tests/nbackend/kernelcreation/test_domain_kernels.py | 4 ++-- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/pystencils/backend/platforms/generic_gpu.py b/src/pystencils/backend/platforms/generic_gpu.py index 1de001643..774b9405c 100644 --- a/src/pystencils/backend/platforms/generic_gpu.py +++ b/src/pystencils/backend/platforms/generic_gpu.py @@ -13,6 +13,7 @@ from .platform import Platform class GpuThreadsRange: + """Number of threads required by a GPU kernel, in order (x, y, z).""" @staticmethod def from_ispace(ispace: IterationSpace) -> GpuThreadsRange: @@ -41,6 +42,7 @@ class GpuThreadsRange: @property def num_work_items(self) -> tuple[PsExpression, ...]: + """Number of work items in (x, y, z)-order.""" return self._num_work_items @property @@ -49,7 +51,7 @@ class GpuThreadsRange: @staticmethod def _from_full_ispace(ispace: FullIterationSpace) -> GpuThreadsRange: - dimensions = ispace.dimensions_in_loop_order() + dimensions = ispace.dimensions_in_loop_order()[::-1] if len(dimensions) > 3: raise NotImplementedError( f"Cannot create a GPU threads range for an {len(dimensions)}-dimensional iteration space" diff --git a/tests/nbackend/kernelcreation/platform/test_gpu_platforms.py b/tests/nbackend/kernelcreation/platform/test_gpu_platforms.py index c1bfd28ad..da2b3a5ad 100644 --- a/tests/nbackend/kernelcreation/platform/test_gpu_platforms.py +++ b/tests/nbackend/kernelcreation/platform/test_gpu_platforms.py @@ -28,10 +28,15 @@ def test_thread_range(platform_class, layout): assert threads_range.dim == dim - loop_order = archetype_field.layout + match layout: + case "fzyx" | "zyxf" | "f": + indexing_order = [0, 1, 2] + case "c": + indexing_order = [2, 1, 0] for i in range(dim): - coordinate = loop_order[i] + # Slowest to fastest coordinate + coordinate = indexing_order[i] dimension = ispace.dimensions[coordinate] witems = threads_range.num_work_items[i] desired = dimension.stop - dimension.start diff --git a/tests/nbackend/kernelcreation/test_domain_kernels.py b/tests/nbackend/kernelcreation/test_domain_kernels.py index 9a1b36638..5850c94d7 100644 --- a/tests/nbackend/kernelcreation/test_domain_kernels.py +++ b/tests/nbackend/kernelcreation/test_domain_kernels.py @@ -30,7 +30,7 @@ def test_filter_kernel(target): ast = create_kernel(asms, gen_config) kernel = ast.compile() - src_arr = xp.ones((42, 42)) + src_arr = xp.ones((42, 31)) dst_arr = xp.zeros_like(src_arr) kernel(src=src_arr, dst=dst_arr, weight=2.0) @@ -55,7 +55,7 @@ def test_filter_kernel_fixedsize(target): [1, 1, 1] ] - src_arr = xp.ones((42, 42)) + src_arr = xp.ones((42, 31)) dst_arr = xp.zeros_like(src_arr) src = Field.create_from_numpy_array("src", src_arr) -- GitLab