update tests

9e729903 · Frederik Hennig · 6a06d72f · 9e729903 · 9e729903 · 9e729903
Commit 9e729903 authored 4 months ago by Frederik Hennig
--- a/conftest.py
+++ b/conftest.py
@@ -43,10 +43,16 @@ def add_path_to_ignore(path):
    ]
-collect_ignore = [
+def ignore_file(fp):
-    os.path.join(SCRIPT_FOLDER, "doc", "conf.py"),
+    global collect_ignore
-    os.path.join(SCRIPT_FOLDER, "src", "pystencils", "opencl", "opencl.autoinit"),
+    collect_ignore += [os.path.join(SCRIPT_FOLDER, fp)]
-]
+collect_ignore = []
+ignore_file("noxfile.py")
+ignore_file("docs/source/conf.py")
+add_path_to_ignore("docs/build")
 add_path_to_ignore("tests/benchmark")
 add_path_to_ignore("_local_tmp")

--- a/src/pystencils/codegen/config.py
+++ b/src/pystencils/codegen/config.py
@@ -395,10 +395,12 @@ class GpuOptions(ConfigBase):
    """
    @staticmethod
-    def default_warp_size(target: Target):
+    def default_warp_size(target: Target) -> int | None:
        match target:
            case Target.CUDA:
                return 32
+            case Target.HIP:
+                return None
            case _:
                raise NotImplementedError(
                    f"No default warp/wavefront size known for target {target}"

--- a/src/pystencils/codegen/driver.py
+++ b/src/pystencils/codegen/driver.py
@@ -410,6 +410,8 @@ class DefaultKernelCreationDriver:
        if warp_size is None:
            warp_size = GpuOptions.default_warp_size(self._target)
+        # TODO: Warn if warp_size is None and assume_warp_aligned_block_size is True
        return GpuIndexing(
            self._ctx,
            self._target,

--- a/src/pystencils/codegen/gpu_indexing.py
+++ b/src/pystencils/codegen/gpu_indexing.py
@@ -34,10 +34,7 @@ class HardwareProperties:
    max_threads_per_block: int
    max_block_sizes: dim3
-    def block_size_exceeds_hw_limits(
+    def block_size_exceeds_hw_limits(self, block_size: tuple[int, ...]) -> bool:
-            self,
-            block_size: tuple[int, ...]
-    ) -> bool:
        """Checks if provided block size conforms limits given by the hardware."""
        return (
@@ -106,8 +103,10 @@ class GpuLaunchConfiguration(ABC):
    @staticmethod
    def _excessive_block_size_error_msg(block_size: tuple[int, ...]):
-        return f"Unable to determine GPU block size for this kernel. \
+        return (
-        Final block size was too large: {block_size}."
+            "Unable to determine GPU block size for this kernel. "
+            f"Final block size was too large: {block_size}."
+        )
 class AutomaticLaunchConfiguration(GpuLaunchConfiguration):
@@ -139,7 +138,9 @@ class AutomaticLaunchConfiguration(GpuLaunchConfiguration):
    @block_size.setter
    def block_size(self, val: dim3):
-        AttributeError("Setting `block_size` on an automatic launch configuration has no effect.")
+        AttributeError(
+            "Setting `block_size` on an automatic launch configuration has no effect."
+        )
    @property
    def parameters(self) -> frozenset[Parameter]:
@@ -297,7 +298,9 @@ class DynamicBlockSizeLaunchConfiguration(GpuLaunchConfiguration):
    @block_size.setter
    def block_size(self, val: dim3):
-        AttributeError("Setting `block_size` on an dynamic launch configuration has no effect.")
+        AttributeError(
+            "Setting `block_size` on an dynamic launch configuration has no effect."
+        )
    @staticmethod
    def _round_block_sizes_to_warp_size(
@@ -313,7 +316,7 @@ class DynamicBlockSizeLaunchConfiguration(GpuLaunchConfiguration):
            return (
                *to_round[:index_to_round],
                ceil_to_multiple(to_round[index_to_round], warp_size),
-                *to_round[index_to_round + 1:],
+                *to_round[index_to_round + 1 :],
            )
        else:
            return (
@@ -518,6 +521,8 @@ class GpuIndexing:
        match target:
            case Target.CUDA:
                return (1024, 1024, 64)
+            case Target.HIP:
+                return (1024, 1024, 1024)
            case _:
                raise CodegenError(
                    f"Cannot determine max GPU block sizes for target {target}"
@@ -526,7 +531,7 @@ class GpuIndexing:
    @staticmethod
    def get_max_threads_per_block(target: Target):
        match target:
-            case Target.CUDA:
+            case Target.CUDA | Target.HIP:
                return 1024
            case _:
                raise CodegenError(
@@ -606,8 +611,14 @@ class GpuIndexing:
        if self._assume_warp_aligned_block_size:
            warp_size = self._ast_factory.parse_index(self._hw_props.warp_size)
            rounded_block_size = self._ast_factory.parse_index(
-                PsIntDiv(work_items[0].clone() + warp_size.clone() - self._ast_factory.parse_index(1),
+                PsIntDiv(
-                         warp_size.clone()) * warp_size.clone())
+                    work_items[0].clone()
+                    + warp_size.clone()
+                    - self._ast_factory.parse_index(1),
+                    warp_size.clone(),
+                )
+                * warp_size.clone()
+            )
        else:
            rounded_block_size = work_items[0]

--- a/tests/kernelcreation/test_gpu.py
+++ b/tests/kernelcreation/test_gpu.py
@@ -90,10 +90,32 @@ def test_indexing_options_3d(
    cp.testing.assert_allclose(dst_arr, expected)
-@pytest.mark.parametrize("iteration_space",
-                         [(8, 4, 4), (3, 8, 8), (3, 3, 16), (17, 3, 3), (3, 12, 56), (65, 65, 65), (3, 7, 9)])
+@pytest.mark.parametrize(
-@pytest.mark.parametrize("initial_block_size",
+    "iteration_space",
-                         [(8, 4, 4), (3, 8, 8), (3, 3, 16), (2, 2, 64), (8, 2, 1), (3, 1, 32), (32, 1, 1), (1, 2, 3)])
+    [
+        (8, 4, 4),
+        (1, 8, 8),
+        (1, 1, 16),
+        (17, 1, 1),
+        (1, 12, 56),
+        (65, 65, 65),
+        (1, 7, 9),
+    ],
+)
+@pytest.mark.parametrize(
+    "initial_block_size",
+    [
+        (8, 4, 4),
+        (1, 8, 8),
+        (1, 1, 16),
+        (2, 2, 64),
+        (8, 2, 1),
+        (3, 1, 32),
+        (32, 1, 1),
+        (1, 2, 3),
+    ],
+)
 @pytest.mark.parametrize("assume_warp_aligned_block_size", [True, False])
 @pytest.mark.parametrize("use_block_fitting", [True, False])
 def test_block_size_adaptations(
@@ -102,7 +124,13 @@ def test_block_size_adaptations(
    assume_warp_aligned_block_size: bool,
    use_block_fitting: bool,
 ):
-    src, dst = fields("src, dst: [3D]")
+    field_shape = tuple(2 + x for x in iteration_space[::-1])
+    src_arr = cp.ones(field_shape)
+    dst_arr = cp.zeros_like(src_arr)
+    src = Field.create_from_numpy_array("src", src_arr)
+    dst = Field.create_from_numpy_array("dst", dst_arr)
    asm = Assignment(
        dst.center(),
        src[-1, 0, 0]
@@ -113,25 +141,20 @@ def test_block_size_adaptations(
        + src[0, 0, 1],
    )
-    target = Target.CUDA
+    target = Target.CurrentGPU
    cfg = CreateKernelConfig(target=target)
    cfg.gpu.indexing_scheme = "linear3d"
    cfg.gpu.assume_warp_aligned_block_size = assume_warp_aligned_block_size
-    warp_size = cfg.gpu.default_warp_size(target)
+    warp_size = cfg.gpu.default_warp_size(cfg.get_target())
-    max_threads_per_block = GpuIndexing.get_max_threads_per_block(target)
-    max_block_sizes = GpuIndexing.get_max_block_sizes(target)
    ast = create_kernel(asm, cfg)
    kernel = ast.compile()
    if use_block_fitting:
        # test internal block fitting function later used in `kernel.launch_config.fit_block_size`
-        internal_block_size = kernel.launch_config._fit_block_size_to_it_space(
+        kernel.launch_config.fit_block_size(initial_block_size)
-            iteration_space,
+        internal_block_size, _ = kernel.launch_config.evaluate()
-            initial_block_size,
-            HardwareProperties(warp_size, max_threads_per_block, max_block_sizes),
-        )
        # checks if criterion for warp size alignment is fulfilled
        def check_suitability(b):
@@ -139,25 +162,20 @@ def test_block_size_adaptations(
        # block size fitting should not modify an already ideal configuration
        # -> check if ideal configurations are modified
-        if (
+        if check_suitability(initial_block_size) and all(
-                check_suitability(initial_block_size)
+            x == y for x, y in zip(initial_block_size, iteration_space)
-                and all(x == y for x, y in zip(initial_block_size, iteration_space))  # trimming may alter results
+        ):  # trimming may alter results
-        ):
+            assert all(
-            assert all(x == y for x, y in zip(initial_block_size, internal_block_size)), \
+                x == y for x, y in zip(initial_block_size, internal_block_size)
-                f"Initial block size unnecessarily adapted from {initial_block_size} to {internal_block_size}."
+            ), f"Initial block size unnecessarily adapted from {initial_block_size} to {internal_block_size}."
-        assert check_suitability(internal_block_size), \
+        assert check_suitability(
-            "Determined block size shall be divisible by warp size."
+            internal_block_size
+        ), "Determined block size shall be divisible by warp size."
-        # set block size via fitting algorithm
-        kernel.launch_config.fit_block_size(initial_block_size)
    else:
        # set block size via trimming algorithm
        kernel.launch_config.trim_block_size(initial_block_size)
-    src_arr = cp.ones(iteration_space)
-    dst_arr = cp.zeros_like(src_arr)
    kernel(src=src_arr, dst=dst_arr)
    expected = cp.zeros_like(src_arr)
@@ -173,13 +191,7 @@ def test_indexing_options_2d(
    indexing_scheme: str, manual_grid: bool, assume_warp_aligned_block_size: bool
 ):
    src, dst = fields("src, dst: [2D]")
-    asm = Assignment(
+    asm = Assignment(dst.center(), src[-1, 0] + src[1, 0] + src[0, -1] + src[0, 1])
-        dst.center(),
-        src[-1, 0]
-        + src[1, 0]
-        + src[0, -1]
-        + src[0, 1]
-    )
    cfg = CreateKernelConfig(target=Target.CurrentGPU)
    cfg.gpu.indexing_scheme = indexing_scheme