From 9e7299033af893db0fd38a0d64e88493fd536476 Mon Sep 17 00:00:00 2001
From: Frederik Hennig <frederik.hennig@fau.de>
Date: Tue, 18 Mar 2025 16:07:13 +0000
Subject: [PATCH] update tests

---
 conftest.py                            | 14 +++--
 src/pystencils/codegen/config.py       |  4 +-
 src/pystencils/codegen/driver.py       |  2 +
 src/pystencils/codegen/gpu_indexing.py | 35 +++++++----
 tests/kernelcreation/test_gpu.py       | 84 +++++++++++++++-----------
 5 files changed, 86 insertions(+), 53 deletions(-)

diff --git a/conftest.py b/conftest.py
index ff0467eff..7ea8f5ba0 100644
--- a/conftest.py
+++ b/conftest.py
@@ -43,10 +43,16 @@ def add_path_to_ignore(path):
     ]
 
 
-collect_ignore = [
-    os.path.join(SCRIPT_FOLDER, "doc", "conf.py"),
-    os.path.join(SCRIPT_FOLDER, "src", "pystencils", "opencl", "opencl.autoinit"),
-]
+def ignore_file(fp):
+    global collect_ignore
+    collect_ignore += [os.path.join(SCRIPT_FOLDER, fp)]
+
+
+collect_ignore = []
+
+ignore_file("noxfile.py")
+ignore_file("docs/source/conf.py")
+add_path_to_ignore("docs/build")
 add_path_to_ignore("tests/benchmark")
 add_path_to_ignore("_local_tmp")
 
diff --git a/src/pystencils/codegen/config.py b/src/pystencils/codegen/config.py
index 91aff43f4..8e7e54ff1 100644
--- a/src/pystencils/codegen/config.py
+++ b/src/pystencils/codegen/config.py
@@ -395,10 +395,12 @@ class GpuOptions(ConfigBase):
     """
 
     @staticmethod
-    def default_warp_size(target: Target):
+    def default_warp_size(target: Target) -> int | None:
         match target:
             case Target.CUDA:
                 return 32
+            case Target.HIP:
+                return None
             case _:
                 raise NotImplementedError(
                     f"No default warp/wavefront size known for target {target}"
diff --git a/src/pystencils/codegen/driver.py b/src/pystencils/codegen/driver.py
index 9f59e3510..543d9db2f 100644
--- a/src/pystencils/codegen/driver.py
+++ b/src/pystencils/codegen/driver.py
@@ -410,6 +410,8 @@ class DefaultKernelCreationDriver:
         if warp_size is None:
             warp_size = GpuOptions.default_warp_size(self._target)
 
+        # TODO: Warn if warp_size is None and assume_warp_aligned_block_size is True
+
         return GpuIndexing(
             self._ctx,
             self._target,
diff --git a/src/pystencils/codegen/gpu_indexing.py b/src/pystencils/codegen/gpu_indexing.py
index 0524eb0b1..b5e70043f 100644
--- a/src/pystencils/codegen/gpu_indexing.py
+++ b/src/pystencils/codegen/gpu_indexing.py
@@ -34,10 +34,7 @@ class HardwareProperties:
     max_threads_per_block: int
     max_block_sizes: dim3
 
-    def block_size_exceeds_hw_limits(
-            self,
-            block_size: tuple[int, ...]
-    ) -> bool:
+    def block_size_exceeds_hw_limits(self, block_size: tuple[int, ...]) -> bool:
         """Checks if provided block size conforms limits given by the hardware."""
 
         return (
@@ -106,8 +103,10 @@ class GpuLaunchConfiguration(ABC):
 
     @staticmethod
     def _excessive_block_size_error_msg(block_size: tuple[int, ...]):
-        return f"Unable to determine GPU block size for this kernel. \
-        Final block size was too large: {block_size}."
+        return (
+            "Unable to determine GPU block size for this kernel. "
+            f"Final block size was too large: {block_size}."
+        )
 
 
 class AutomaticLaunchConfiguration(GpuLaunchConfiguration):
@@ -139,7 +138,9 @@ class AutomaticLaunchConfiguration(GpuLaunchConfiguration):
 
     @block_size.setter
     def block_size(self, val: dim3):
-        AttributeError("Setting `block_size` on an automatic launch configuration has no effect.")
+        AttributeError(
+            "Setting `block_size` on an automatic launch configuration has no effect."
+        )
 
     @property
     def parameters(self) -> frozenset[Parameter]:
@@ -297,7 +298,9 @@ class DynamicBlockSizeLaunchConfiguration(GpuLaunchConfiguration):
 
     @block_size.setter
     def block_size(self, val: dim3):
-        AttributeError("Setting `block_size` on an dynamic launch configuration has no effect.")
+        AttributeError(
+            "Setting `block_size` on an dynamic launch configuration has no effect."
+        )
 
     @staticmethod
     def _round_block_sizes_to_warp_size(
@@ -313,7 +316,7 @@ class DynamicBlockSizeLaunchConfiguration(GpuLaunchConfiguration):
             return (
                 *to_round[:index_to_round],
                 ceil_to_multiple(to_round[index_to_round], warp_size),
-                *to_round[index_to_round + 1:],
+                *to_round[index_to_round + 1 :],
             )
         else:
             return (
@@ -518,6 +521,8 @@ class GpuIndexing:
         match target:
             case Target.CUDA:
                 return (1024, 1024, 64)
+            case Target.HIP:
+                return (1024, 1024, 1024)
             case _:
                 raise CodegenError(
                     f"Cannot determine max GPU block sizes for target {target}"
@@ -526,7 +531,7 @@ class GpuIndexing:
     @staticmethod
     def get_max_threads_per_block(target: Target):
         match target:
-            case Target.CUDA:
+            case Target.CUDA | Target.HIP:
                 return 1024
             case _:
                 raise CodegenError(
@@ -606,8 +611,14 @@ class GpuIndexing:
         if self._assume_warp_aligned_block_size:
             warp_size = self._ast_factory.parse_index(self._hw_props.warp_size)
             rounded_block_size = self._ast_factory.parse_index(
-                PsIntDiv(work_items[0].clone() + warp_size.clone() - self._ast_factory.parse_index(1),
-                         warp_size.clone()) * warp_size.clone())
+                PsIntDiv(
+                    work_items[0].clone()
+                    + warp_size.clone()
+                    - self._ast_factory.parse_index(1),
+                    warp_size.clone(),
+                )
+                * warp_size.clone()
+            )
         else:
             rounded_block_size = work_items[0]
 
diff --git a/tests/kernelcreation/test_gpu.py b/tests/kernelcreation/test_gpu.py
index 944bd1241..bbe9aedd9 100644
--- a/tests/kernelcreation/test_gpu.py
+++ b/tests/kernelcreation/test_gpu.py
@@ -90,10 +90,32 @@ def test_indexing_options_3d(
 
     cp.testing.assert_allclose(dst_arr, expected)
 
-@pytest.mark.parametrize("iteration_space",
-                         [(8, 4, 4), (3, 8, 8), (3, 3, 16), (17, 3, 3), (3, 12, 56), (65, 65, 65), (3, 7, 9)])
-@pytest.mark.parametrize("initial_block_size",
-                         [(8, 4, 4), (3, 8, 8), (3, 3, 16), (2, 2, 64), (8, 2, 1), (3, 1, 32), (32, 1, 1), (1, 2, 3)])
+
+@pytest.mark.parametrize(
+    "iteration_space",
+    [
+        (8, 4, 4),
+        (1, 8, 8),
+        (1, 1, 16),
+        (17, 1, 1),
+        (1, 12, 56),
+        (65, 65, 65),
+        (1, 7, 9),
+    ],
+)
+@pytest.mark.parametrize(
+    "initial_block_size",
+    [
+        (8, 4, 4),
+        (1, 8, 8),
+        (1, 1, 16),
+        (2, 2, 64),
+        (8, 2, 1),
+        (3, 1, 32),
+        (32, 1, 1),
+        (1, 2, 3),
+    ],
+)
 @pytest.mark.parametrize("assume_warp_aligned_block_size", [True, False])
 @pytest.mark.parametrize("use_block_fitting", [True, False])
 def test_block_size_adaptations(
@@ -102,7 +124,13 @@ def test_block_size_adaptations(
     assume_warp_aligned_block_size: bool,
     use_block_fitting: bool,
 ):
-    src, dst = fields("src, dst: [3D]")
+    field_shape = tuple(2 + x for x in iteration_space[::-1])
+    src_arr = cp.ones(field_shape)
+    dst_arr = cp.zeros_like(src_arr)
+
+    src = Field.create_from_numpy_array("src", src_arr)
+    dst = Field.create_from_numpy_array("dst", dst_arr)
+
     asm = Assignment(
         dst.center(),
         src[-1, 0, 0]
@@ -113,25 +141,20 @@ def test_block_size_adaptations(
         + src[0, 0, 1],
     )
 
-    target = Target.CUDA
+    target = Target.CurrentGPU
     cfg = CreateKernelConfig(target=target)
     cfg.gpu.indexing_scheme = "linear3d"
     cfg.gpu.assume_warp_aligned_block_size = assume_warp_aligned_block_size
 
-    warp_size = cfg.gpu.default_warp_size(target)
-    max_threads_per_block = GpuIndexing.get_max_threads_per_block(target)
-    max_block_sizes = GpuIndexing.get_max_block_sizes(target)
+    warp_size = cfg.gpu.default_warp_size(cfg.get_target())
 
     ast = create_kernel(asm, cfg)
     kernel = ast.compile()
 
     if use_block_fitting:
         # test internal block fitting function later used in `kernel.launch_config.fit_block_size`
-        internal_block_size = kernel.launch_config._fit_block_size_to_it_space(
-            iteration_space,
-            initial_block_size,
-            HardwareProperties(warp_size, max_threads_per_block, max_block_sizes),
-        )
+        kernel.launch_config.fit_block_size(initial_block_size)
+        internal_block_size, _ = kernel.launch_config.evaluate()
 
         # checks if criterion for warp size alignment is fulfilled
         def check_suitability(b):
@@ -139,25 +162,20 @@ def test_block_size_adaptations(
 
         # block size fitting should not modify an already ideal configuration
         # -> check if ideal configurations are modified
-        if (
-                check_suitability(initial_block_size)
-                and all(x == y for x, y in zip(initial_block_size, iteration_space))  # trimming may alter results
-        ):
-            assert all(x == y for x, y in zip(initial_block_size, internal_block_size)), \
-                f"Initial block size unnecessarily adapted from {initial_block_size} to {internal_block_size}."
-
-        assert check_suitability(internal_block_size), \
-            "Determined block size shall be divisible by warp size."
-
-        # set block size via fitting algorithm
-        kernel.launch_config.fit_block_size(initial_block_size)
+        if check_suitability(initial_block_size) and all(
+            x == y for x, y in zip(initial_block_size, iteration_space)
+        ):  # trimming may alter results
+            assert all(
+                x == y for x, y in zip(initial_block_size, internal_block_size)
+            ), f"Initial block size unnecessarily adapted from {initial_block_size} to {internal_block_size}."
+
+        assert check_suitability(
+            internal_block_size
+        ), "Determined block size shall be divisible by warp size."
     else:
         # set block size via trimming algorithm
         kernel.launch_config.trim_block_size(initial_block_size)
 
-    src_arr = cp.ones(iteration_space)
-    dst_arr = cp.zeros_like(src_arr)
-
     kernel(src=src_arr, dst=dst_arr)
 
     expected = cp.zeros_like(src_arr)
@@ -173,13 +191,7 @@ def test_indexing_options_2d(
     indexing_scheme: str, manual_grid: bool, assume_warp_aligned_block_size: bool
 ):
     src, dst = fields("src, dst: [2D]")
-    asm = Assignment(
-        dst.center(),
-        src[-1, 0]
-        + src[1, 0]
-        + src[0, -1]
-        + src[0, 1]
-    )
+    asm = Assignment(dst.center(), src[-1, 0] + src[1, 0] + src[0, -1] + src[0, 1])
 
     cfg = CreateKernelConfig(target=Target.CurrentGPU)
     cfg.gpu.indexing_scheme = indexing_scheme
-- 
GitLab