Skip to content
Snippets Groups Projects
Commit 9e729903 authored by Frederik Hennig's avatar Frederik Hennig
Browse files

update tests

parent 6a06d72f
No related branches found
No related tags found
1 merge request!458HIP Target and Platform
Pipeline #76608 failed
...@@ -43,10 +43,16 @@ def add_path_to_ignore(path): ...@@ -43,10 +43,16 @@ def add_path_to_ignore(path):
] ]
collect_ignore = [ def ignore_file(fp):
os.path.join(SCRIPT_FOLDER, "doc", "conf.py"), global collect_ignore
os.path.join(SCRIPT_FOLDER, "src", "pystencils", "opencl", "opencl.autoinit"), collect_ignore += [os.path.join(SCRIPT_FOLDER, fp)]
]
collect_ignore = []
ignore_file("noxfile.py")
ignore_file("docs/source/conf.py")
add_path_to_ignore("docs/build")
add_path_to_ignore("tests/benchmark") add_path_to_ignore("tests/benchmark")
add_path_to_ignore("_local_tmp") add_path_to_ignore("_local_tmp")
......
...@@ -395,10 +395,12 @@ class GpuOptions(ConfigBase): ...@@ -395,10 +395,12 @@ class GpuOptions(ConfigBase):
""" """
@staticmethod @staticmethod
def default_warp_size(target: Target): def default_warp_size(target: Target) -> int | None:
match target: match target:
case Target.CUDA: case Target.CUDA:
return 32 return 32
case Target.HIP:
return None
case _: case _:
raise NotImplementedError( raise NotImplementedError(
f"No default warp/wavefront size known for target {target}" f"No default warp/wavefront size known for target {target}"
......
...@@ -410,6 +410,8 @@ class DefaultKernelCreationDriver: ...@@ -410,6 +410,8 @@ class DefaultKernelCreationDriver:
if warp_size is None: if warp_size is None:
warp_size = GpuOptions.default_warp_size(self._target) warp_size = GpuOptions.default_warp_size(self._target)
# TODO: Warn if warp_size is None and assume_warp_aligned_block_size is True
return GpuIndexing( return GpuIndexing(
self._ctx, self._ctx,
self._target, self._target,
......
...@@ -34,10 +34,7 @@ class HardwareProperties: ...@@ -34,10 +34,7 @@ class HardwareProperties:
max_threads_per_block: int max_threads_per_block: int
max_block_sizes: dim3 max_block_sizes: dim3
def block_size_exceeds_hw_limits( def block_size_exceeds_hw_limits(self, block_size: tuple[int, ...]) -> bool:
self,
block_size: tuple[int, ...]
) -> bool:
"""Checks if provided block size conforms limits given by the hardware.""" """Checks if provided block size conforms limits given by the hardware."""
return ( return (
...@@ -106,8 +103,10 @@ class GpuLaunchConfiguration(ABC): ...@@ -106,8 +103,10 @@ class GpuLaunchConfiguration(ABC):
@staticmethod @staticmethod
def _excessive_block_size_error_msg(block_size: tuple[int, ...]): def _excessive_block_size_error_msg(block_size: tuple[int, ...]):
return f"Unable to determine GPU block size for this kernel. \ return (
Final block size was too large: {block_size}." "Unable to determine GPU block size for this kernel. "
f"Final block size was too large: {block_size}."
)
class AutomaticLaunchConfiguration(GpuLaunchConfiguration): class AutomaticLaunchConfiguration(GpuLaunchConfiguration):
...@@ -139,7 +138,9 @@ class AutomaticLaunchConfiguration(GpuLaunchConfiguration): ...@@ -139,7 +138,9 @@ class AutomaticLaunchConfiguration(GpuLaunchConfiguration):
@block_size.setter @block_size.setter
def block_size(self, val: dim3): def block_size(self, val: dim3):
AttributeError("Setting `block_size` on an automatic launch configuration has no effect.") AttributeError(
"Setting `block_size` on an automatic launch configuration has no effect."
)
@property @property
def parameters(self) -> frozenset[Parameter]: def parameters(self) -> frozenset[Parameter]:
...@@ -297,7 +298,9 @@ class DynamicBlockSizeLaunchConfiguration(GpuLaunchConfiguration): ...@@ -297,7 +298,9 @@ class DynamicBlockSizeLaunchConfiguration(GpuLaunchConfiguration):
@block_size.setter @block_size.setter
def block_size(self, val: dim3): def block_size(self, val: dim3):
AttributeError("Setting `block_size` on an dynamic launch configuration has no effect.") AttributeError(
"Setting `block_size` on an dynamic launch configuration has no effect."
)
@staticmethod @staticmethod
def _round_block_sizes_to_warp_size( def _round_block_sizes_to_warp_size(
...@@ -313,7 +316,7 @@ class DynamicBlockSizeLaunchConfiguration(GpuLaunchConfiguration): ...@@ -313,7 +316,7 @@ class DynamicBlockSizeLaunchConfiguration(GpuLaunchConfiguration):
return ( return (
*to_round[:index_to_round], *to_round[:index_to_round],
ceil_to_multiple(to_round[index_to_round], warp_size), ceil_to_multiple(to_round[index_to_round], warp_size),
*to_round[index_to_round + 1:], *to_round[index_to_round + 1 :],
) )
else: else:
return ( return (
...@@ -518,6 +521,8 @@ class GpuIndexing: ...@@ -518,6 +521,8 @@ class GpuIndexing:
match target: match target:
case Target.CUDA: case Target.CUDA:
return (1024, 1024, 64) return (1024, 1024, 64)
case Target.HIP:
return (1024, 1024, 1024)
case _: case _:
raise CodegenError( raise CodegenError(
f"Cannot determine max GPU block sizes for target {target}" f"Cannot determine max GPU block sizes for target {target}"
...@@ -526,7 +531,7 @@ class GpuIndexing: ...@@ -526,7 +531,7 @@ class GpuIndexing:
@staticmethod @staticmethod
def get_max_threads_per_block(target: Target): def get_max_threads_per_block(target: Target):
match target: match target:
case Target.CUDA: case Target.CUDA | Target.HIP:
return 1024 return 1024
case _: case _:
raise CodegenError( raise CodegenError(
...@@ -606,8 +611,14 @@ class GpuIndexing: ...@@ -606,8 +611,14 @@ class GpuIndexing:
if self._assume_warp_aligned_block_size: if self._assume_warp_aligned_block_size:
warp_size = self._ast_factory.parse_index(self._hw_props.warp_size) warp_size = self._ast_factory.parse_index(self._hw_props.warp_size)
rounded_block_size = self._ast_factory.parse_index( rounded_block_size = self._ast_factory.parse_index(
PsIntDiv(work_items[0].clone() + warp_size.clone() - self._ast_factory.parse_index(1), PsIntDiv(
warp_size.clone()) * warp_size.clone()) work_items[0].clone()
+ warp_size.clone()
- self._ast_factory.parse_index(1),
warp_size.clone(),
)
* warp_size.clone()
)
else: else:
rounded_block_size = work_items[0] rounded_block_size = work_items[0]
......
...@@ -90,10 +90,32 @@ def test_indexing_options_3d( ...@@ -90,10 +90,32 @@ def test_indexing_options_3d(
cp.testing.assert_allclose(dst_arr, expected) cp.testing.assert_allclose(dst_arr, expected)
@pytest.mark.parametrize("iteration_space",
[(8, 4, 4), (3, 8, 8), (3, 3, 16), (17, 3, 3), (3, 12, 56), (65, 65, 65), (3, 7, 9)]) @pytest.mark.parametrize(
@pytest.mark.parametrize("initial_block_size", "iteration_space",
[(8, 4, 4), (3, 8, 8), (3, 3, 16), (2, 2, 64), (8, 2, 1), (3, 1, 32), (32, 1, 1), (1, 2, 3)]) [
(8, 4, 4),
(1, 8, 8),
(1, 1, 16),
(17, 1, 1),
(1, 12, 56),
(65, 65, 65),
(1, 7, 9),
],
)
@pytest.mark.parametrize(
"initial_block_size",
[
(8, 4, 4),
(1, 8, 8),
(1, 1, 16),
(2, 2, 64),
(8, 2, 1),
(3, 1, 32),
(32, 1, 1),
(1, 2, 3),
],
)
@pytest.mark.parametrize("assume_warp_aligned_block_size", [True, False]) @pytest.mark.parametrize("assume_warp_aligned_block_size", [True, False])
@pytest.mark.parametrize("use_block_fitting", [True, False]) @pytest.mark.parametrize("use_block_fitting", [True, False])
def test_block_size_adaptations( def test_block_size_adaptations(
...@@ -102,7 +124,13 @@ def test_block_size_adaptations( ...@@ -102,7 +124,13 @@ def test_block_size_adaptations(
assume_warp_aligned_block_size: bool, assume_warp_aligned_block_size: bool,
use_block_fitting: bool, use_block_fitting: bool,
): ):
src, dst = fields("src, dst: [3D]") field_shape = tuple(2 + x for x in iteration_space[::-1])
src_arr = cp.ones(field_shape)
dst_arr = cp.zeros_like(src_arr)
src = Field.create_from_numpy_array("src", src_arr)
dst = Field.create_from_numpy_array("dst", dst_arr)
asm = Assignment( asm = Assignment(
dst.center(), dst.center(),
src[-1, 0, 0] src[-1, 0, 0]
...@@ -113,25 +141,20 @@ def test_block_size_adaptations( ...@@ -113,25 +141,20 @@ def test_block_size_adaptations(
+ src[0, 0, 1], + src[0, 0, 1],
) )
target = Target.CUDA target = Target.CurrentGPU
cfg = CreateKernelConfig(target=target) cfg = CreateKernelConfig(target=target)
cfg.gpu.indexing_scheme = "linear3d" cfg.gpu.indexing_scheme = "linear3d"
cfg.gpu.assume_warp_aligned_block_size = assume_warp_aligned_block_size cfg.gpu.assume_warp_aligned_block_size = assume_warp_aligned_block_size
warp_size = cfg.gpu.default_warp_size(target) warp_size = cfg.gpu.default_warp_size(cfg.get_target())
max_threads_per_block = GpuIndexing.get_max_threads_per_block(target)
max_block_sizes = GpuIndexing.get_max_block_sizes(target)
ast = create_kernel(asm, cfg) ast = create_kernel(asm, cfg)
kernel = ast.compile() kernel = ast.compile()
if use_block_fitting: if use_block_fitting:
# test internal block fitting function later used in `kernel.launch_config.fit_block_size` # test internal block fitting function later used in `kernel.launch_config.fit_block_size`
internal_block_size = kernel.launch_config._fit_block_size_to_it_space( kernel.launch_config.fit_block_size(initial_block_size)
iteration_space, internal_block_size, _ = kernel.launch_config.evaluate()
initial_block_size,
HardwareProperties(warp_size, max_threads_per_block, max_block_sizes),
)
# checks if criterion for warp size alignment is fulfilled # checks if criterion for warp size alignment is fulfilled
def check_suitability(b): def check_suitability(b):
...@@ -139,25 +162,20 @@ def test_block_size_adaptations( ...@@ -139,25 +162,20 @@ def test_block_size_adaptations(
# block size fitting should not modify an already ideal configuration # block size fitting should not modify an already ideal configuration
# -> check if ideal configurations are modified # -> check if ideal configurations are modified
if ( if check_suitability(initial_block_size) and all(
check_suitability(initial_block_size) x == y for x, y in zip(initial_block_size, iteration_space)
and all(x == y for x, y in zip(initial_block_size, iteration_space)) # trimming may alter results ): # trimming may alter results
): assert all(
assert all(x == y for x, y in zip(initial_block_size, internal_block_size)), \ x == y for x, y in zip(initial_block_size, internal_block_size)
f"Initial block size unnecessarily adapted from {initial_block_size} to {internal_block_size}." ), f"Initial block size unnecessarily adapted from {initial_block_size} to {internal_block_size}."
assert check_suitability(internal_block_size), \ assert check_suitability(
"Determined block size shall be divisible by warp size." internal_block_size
), "Determined block size shall be divisible by warp size."
# set block size via fitting algorithm
kernel.launch_config.fit_block_size(initial_block_size)
else: else:
# set block size via trimming algorithm # set block size via trimming algorithm
kernel.launch_config.trim_block_size(initial_block_size) kernel.launch_config.trim_block_size(initial_block_size)
src_arr = cp.ones(iteration_space)
dst_arr = cp.zeros_like(src_arr)
kernel(src=src_arr, dst=dst_arr) kernel(src=src_arr, dst=dst_arr)
expected = cp.zeros_like(src_arr) expected = cp.zeros_like(src_arr)
...@@ -173,13 +191,7 @@ def test_indexing_options_2d( ...@@ -173,13 +191,7 @@ def test_indexing_options_2d(
indexing_scheme: str, manual_grid: bool, assume_warp_aligned_block_size: bool indexing_scheme: str, manual_grid: bool, assume_warp_aligned_block_size: bool
): ):
src, dst = fields("src, dst: [2D]") src, dst = fields("src, dst: [2D]")
asm = Assignment( asm = Assignment(dst.center(), src[-1, 0] + src[1, 0] + src[0, -1] + src[0, 1])
dst.center(),
src[-1, 0]
+ src[1, 0]
+ src[0, -1]
+ src[0, 1]
)
cfg = CreateKernelConfig(target=Target.CurrentGPU) cfg = CreateKernelConfig(target=Target.CurrentGPU)
cfg.gpu.indexing_scheme = indexing_scheme cfg.gpu.indexing_scheme = indexing_scheme
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment