From 9e7299033af893db0fd38a0d64e88493fd536476 Mon Sep 17 00:00:00 2001 From: Frederik Hennig <frederik.hennig@fau.de> Date: Tue, 18 Mar 2025 16:07:13 +0000 Subject: [PATCH] update tests --- conftest.py | 14 +++-- src/pystencils/codegen/config.py | 4 +- src/pystencils/codegen/driver.py | 2 + src/pystencils/codegen/gpu_indexing.py | 35 +++++++---- tests/kernelcreation/test_gpu.py | 84 +++++++++++++++----------- 5 files changed, 86 insertions(+), 53 deletions(-) diff --git a/conftest.py b/conftest.py index ff0467eff..7ea8f5ba0 100644 --- a/conftest.py +++ b/conftest.py @@ -43,10 +43,16 @@ def add_path_to_ignore(path): ] -collect_ignore = [ - os.path.join(SCRIPT_FOLDER, "doc", "conf.py"), - os.path.join(SCRIPT_FOLDER, "src", "pystencils", "opencl", "opencl.autoinit"), -] +def ignore_file(fp): + global collect_ignore + collect_ignore += [os.path.join(SCRIPT_FOLDER, fp)] + + +collect_ignore = [] + +ignore_file("noxfile.py") +ignore_file("docs/source/conf.py") +add_path_to_ignore("docs/build") add_path_to_ignore("tests/benchmark") add_path_to_ignore("_local_tmp") diff --git a/src/pystencils/codegen/config.py b/src/pystencils/codegen/config.py index 91aff43f4..8e7e54ff1 100644 --- a/src/pystencils/codegen/config.py +++ b/src/pystencils/codegen/config.py @@ -395,10 +395,12 @@ class GpuOptions(ConfigBase): """ @staticmethod - def default_warp_size(target: Target): + def default_warp_size(target: Target) -> int | None: match target: case Target.CUDA: return 32 + case Target.HIP: + return None case _: raise NotImplementedError( f"No default warp/wavefront size known for target {target}" diff --git a/src/pystencils/codegen/driver.py b/src/pystencils/codegen/driver.py index 9f59e3510..543d9db2f 100644 --- a/src/pystencils/codegen/driver.py +++ b/src/pystencils/codegen/driver.py @@ -410,6 +410,8 @@ class DefaultKernelCreationDriver: if warp_size is None: warp_size = GpuOptions.default_warp_size(self._target) + # TODO: Warn if warp_size is None and assume_warp_aligned_block_size is True + return GpuIndexing( self._ctx, self._target, diff --git a/src/pystencils/codegen/gpu_indexing.py b/src/pystencils/codegen/gpu_indexing.py index 0524eb0b1..b5e70043f 100644 --- a/src/pystencils/codegen/gpu_indexing.py +++ b/src/pystencils/codegen/gpu_indexing.py @@ -34,10 +34,7 @@ class HardwareProperties: max_threads_per_block: int max_block_sizes: dim3 - def block_size_exceeds_hw_limits( - self, - block_size: tuple[int, ...] - ) -> bool: + def block_size_exceeds_hw_limits(self, block_size: tuple[int, ...]) -> bool: """Checks if provided block size conforms limits given by the hardware.""" return ( @@ -106,8 +103,10 @@ class GpuLaunchConfiguration(ABC): @staticmethod def _excessive_block_size_error_msg(block_size: tuple[int, ...]): - return f"Unable to determine GPU block size for this kernel. \ - Final block size was too large: {block_size}." + return ( + "Unable to determine GPU block size for this kernel. " + f"Final block size was too large: {block_size}." + ) class AutomaticLaunchConfiguration(GpuLaunchConfiguration): @@ -139,7 +138,9 @@ class AutomaticLaunchConfiguration(GpuLaunchConfiguration): @block_size.setter def block_size(self, val: dim3): - AttributeError("Setting `block_size` on an automatic launch configuration has no effect.") + AttributeError( + "Setting `block_size` on an automatic launch configuration has no effect." + ) @property def parameters(self) -> frozenset[Parameter]: @@ -297,7 +298,9 @@ class DynamicBlockSizeLaunchConfiguration(GpuLaunchConfiguration): @block_size.setter def block_size(self, val: dim3): - AttributeError("Setting `block_size` on an dynamic launch configuration has no effect.") + AttributeError( + "Setting `block_size` on an dynamic launch configuration has no effect." + ) @staticmethod def _round_block_sizes_to_warp_size( @@ -313,7 +316,7 @@ class DynamicBlockSizeLaunchConfiguration(GpuLaunchConfiguration): return ( *to_round[:index_to_round], ceil_to_multiple(to_round[index_to_round], warp_size), - *to_round[index_to_round + 1:], + *to_round[index_to_round + 1 :], ) else: return ( @@ -518,6 +521,8 @@ class GpuIndexing: match target: case Target.CUDA: return (1024, 1024, 64) + case Target.HIP: + return (1024, 1024, 1024) case _: raise CodegenError( f"Cannot determine max GPU block sizes for target {target}" @@ -526,7 +531,7 @@ class GpuIndexing: @staticmethod def get_max_threads_per_block(target: Target): match target: - case Target.CUDA: + case Target.CUDA | Target.HIP: return 1024 case _: raise CodegenError( @@ -606,8 +611,14 @@ class GpuIndexing: if self._assume_warp_aligned_block_size: warp_size = self._ast_factory.parse_index(self._hw_props.warp_size) rounded_block_size = self._ast_factory.parse_index( - PsIntDiv(work_items[0].clone() + warp_size.clone() - self._ast_factory.parse_index(1), - warp_size.clone()) * warp_size.clone()) + PsIntDiv( + work_items[0].clone() + + warp_size.clone() + - self._ast_factory.parse_index(1), + warp_size.clone(), + ) + * warp_size.clone() + ) else: rounded_block_size = work_items[0] diff --git a/tests/kernelcreation/test_gpu.py b/tests/kernelcreation/test_gpu.py index 944bd1241..bbe9aedd9 100644 --- a/tests/kernelcreation/test_gpu.py +++ b/tests/kernelcreation/test_gpu.py @@ -90,10 +90,32 @@ def test_indexing_options_3d( cp.testing.assert_allclose(dst_arr, expected) -@pytest.mark.parametrize("iteration_space", - [(8, 4, 4), (3, 8, 8), (3, 3, 16), (17, 3, 3), (3, 12, 56), (65, 65, 65), (3, 7, 9)]) -@pytest.mark.parametrize("initial_block_size", - [(8, 4, 4), (3, 8, 8), (3, 3, 16), (2, 2, 64), (8, 2, 1), (3, 1, 32), (32, 1, 1), (1, 2, 3)]) + +@pytest.mark.parametrize( + "iteration_space", + [ + (8, 4, 4), + (1, 8, 8), + (1, 1, 16), + (17, 1, 1), + (1, 12, 56), + (65, 65, 65), + (1, 7, 9), + ], +) +@pytest.mark.parametrize( + "initial_block_size", + [ + (8, 4, 4), + (1, 8, 8), + (1, 1, 16), + (2, 2, 64), + (8, 2, 1), + (3, 1, 32), + (32, 1, 1), + (1, 2, 3), + ], +) @pytest.mark.parametrize("assume_warp_aligned_block_size", [True, False]) @pytest.mark.parametrize("use_block_fitting", [True, False]) def test_block_size_adaptations( @@ -102,7 +124,13 @@ def test_block_size_adaptations( assume_warp_aligned_block_size: bool, use_block_fitting: bool, ): - src, dst = fields("src, dst: [3D]") + field_shape = tuple(2 + x for x in iteration_space[::-1]) + src_arr = cp.ones(field_shape) + dst_arr = cp.zeros_like(src_arr) + + src = Field.create_from_numpy_array("src", src_arr) + dst = Field.create_from_numpy_array("dst", dst_arr) + asm = Assignment( dst.center(), src[-1, 0, 0] @@ -113,25 +141,20 @@ def test_block_size_adaptations( + src[0, 0, 1], ) - target = Target.CUDA + target = Target.CurrentGPU cfg = CreateKernelConfig(target=target) cfg.gpu.indexing_scheme = "linear3d" cfg.gpu.assume_warp_aligned_block_size = assume_warp_aligned_block_size - warp_size = cfg.gpu.default_warp_size(target) - max_threads_per_block = GpuIndexing.get_max_threads_per_block(target) - max_block_sizes = GpuIndexing.get_max_block_sizes(target) + warp_size = cfg.gpu.default_warp_size(cfg.get_target()) ast = create_kernel(asm, cfg) kernel = ast.compile() if use_block_fitting: # test internal block fitting function later used in `kernel.launch_config.fit_block_size` - internal_block_size = kernel.launch_config._fit_block_size_to_it_space( - iteration_space, - initial_block_size, - HardwareProperties(warp_size, max_threads_per_block, max_block_sizes), - ) + kernel.launch_config.fit_block_size(initial_block_size) + internal_block_size, _ = kernel.launch_config.evaluate() # checks if criterion for warp size alignment is fulfilled def check_suitability(b): @@ -139,25 +162,20 @@ def test_block_size_adaptations( # block size fitting should not modify an already ideal configuration # -> check if ideal configurations are modified - if ( - check_suitability(initial_block_size) - and all(x == y for x, y in zip(initial_block_size, iteration_space)) # trimming may alter results - ): - assert all(x == y for x, y in zip(initial_block_size, internal_block_size)), \ - f"Initial block size unnecessarily adapted from {initial_block_size} to {internal_block_size}." - - assert check_suitability(internal_block_size), \ - "Determined block size shall be divisible by warp size." - - # set block size via fitting algorithm - kernel.launch_config.fit_block_size(initial_block_size) + if check_suitability(initial_block_size) and all( + x == y for x, y in zip(initial_block_size, iteration_space) + ): # trimming may alter results + assert all( + x == y for x, y in zip(initial_block_size, internal_block_size) + ), f"Initial block size unnecessarily adapted from {initial_block_size} to {internal_block_size}." + + assert check_suitability( + internal_block_size + ), "Determined block size shall be divisible by warp size." else: # set block size via trimming algorithm kernel.launch_config.trim_block_size(initial_block_size) - src_arr = cp.ones(iteration_space) - dst_arr = cp.zeros_like(src_arr) - kernel(src=src_arr, dst=dst_arr) expected = cp.zeros_like(src_arr) @@ -173,13 +191,7 @@ def test_indexing_options_2d( indexing_scheme: str, manual_grid: bool, assume_warp_aligned_block_size: bool ): src, dst = fields("src, dst: [2D]") - asm = Assignment( - dst.center(), - src[-1, 0] - + src[1, 0] - + src[0, -1] - + src[0, 1] - ) + asm = Assignment(dst.center(), src[-1, 0] + src[1, 0] + src[0, -1] + src[0, 1]) cfg = CreateKernelConfig(target=Target.CurrentGPU) cfg.gpu.indexing_scheme = indexing_scheme -- GitLab