Skip to content
Snippets Groups Projects
Commit 72793960 authored by Frederik Hennig's avatar Frederik Hennig
Browse files

add all-round test case for indexing options

parent c0f48e19
No related branches found
No related tags found
1 merge request!449GPU Indexing Schemes and Launch Configurations
...@@ -64,6 +64,7 @@ exclude_lines = ...@@ -64,6 +64,7 @@ exclude_lines =
if 0: if 0:
if False: if False:
if __name__ == .__main__.: if __name__ == .__main__.:
assert False
# Don't cover type checking imports # Don't cover type checking imports
if TYPE_CHECKING: if TYPE_CHECKING:
......
...@@ -32,14 +32,7 @@ def inspect_dp_kernel(kernel: Kernel, gen_config: CreateKernelConfig): ...@@ -32,14 +32,7 @@ def inspect_dp_kernel(kernel: Kernel, gen_config: CreateKernelConfig):
assert "_mm512_storeu_pd" in code assert "_mm512_storeu_pd" in code
def test_filter_kernel(gen_config): def test_filter_kernel(gen_config, xp):
if gen_config.target == Target.CUDA:
import cupy as cp
xp = cp
else:
xp = np
weight = sp.Symbol("weight") weight = sp.Symbol("weight")
stencil = [[1, 1, 1], [1, 1, 1], [1, 1, 1]] stencil = [[1, 1, 1], [1, 1, 1], [1, 1, 1]]
...@@ -62,14 +55,7 @@ def test_filter_kernel(gen_config): ...@@ -62,14 +55,7 @@ def test_filter_kernel(gen_config):
xp.testing.assert_allclose(dst_arr, expected) xp.testing.assert_allclose(dst_arr, expected)
def test_filter_kernel_fixedsize(gen_config): def test_filter_kernel_fixedsize(gen_config, xp):
if gen_config.target == Target.CUDA:
import cupy as cp
xp = cp
else:
xp = np
weight = sp.Symbol("weight") weight = sp.Symbol("weight")
stencil = [[1, 1, 1], [1, 1, 1], [1, 1, 1]] stencil = [[1, 1, 1], [1, 1, 1], [1, 1, 1]]
......
...@@ -11,10 +11,9 @@ from pystencils import ( ...@@ -11,10 +11,9 @@ from pystencils import (
CreateKernelConfig, CreateKernelConfig,
create_kernel, create_kernel,
Target, Target,
assignment_from_stencil,
) )
# from pystencils.gpu import BlockIndexing
from pystencils.simp import sympy_cse_on_assignment_list
from pystencils.slicing import ( from pystencils.slicing import (
add_ghost_layers, add_ghost_layers,
make_slice, make_slice,
...@@ -30,6 +29,58 @@ except ImportError: ...@@ -30,6 +29,58 @@ except ImportError:
pytest.skip(reason="CuPy is not available", allow_module_level=True) pytest.skip(reason="CuPy is not available", allow_module_level=True)
@pytest.mark.parametrize("indexing_scheme", ["linear3d", "blockwise4d"])
@pytest.mark.parametrize("omit_range_check", [False, True])
@pytest.mark.parametrize("manual_grid", [False, True])
def test_indexing_options(
indexing_scheme: str, omit_range_check: bool, manual_grid: bool
):
src, dst = fields("src, dst: [3D]")
asm = Assignment(
dst.center(),
src[-1, 0, 0]
+ src[1, 0, 0]
+ src[0, -1, 0]
+ src[0, 1, 0]
+ src[0, 0, -1]
+ src[0, 0, 1],
)
cfg = CreateKernelConfig(target=Target.CUDA)
cfg.gpu.indexing_scheme = indexing_scheme
cfg.gpu.omit_range_check = omit_range_check
cfg.gpu.manual_launch_grid = manual_grid
ast = create_kernel(asm, cfg)
kernel = ast.compile()
src_arr = cp.ones((18, 34, 42))
dst_arr = cp.zeros_like(src_arr)
if manual_grid:
match indexing_scheme:
case "linear3d":
kernel.launch_config.block_size = (10, 8, 8)
kernel.launch_config.grid_size = (4, 4, 2)
case "blockwise4d":
kernel.launch_config.block_size = (40, 1, 1)
kernel.launch_config.grid_size = (32, 16, 1)
elif indexing_scheme == "linear3d":
kernel.launch_config.block_size = (
10,
8,
8,
) # must fit the src_arr shape (without ghost layers)
kernel(src=src_arr, dst=dst_arr)
expected = cp.zeros_like(src_arr)
expected[1:-1, 1:-1, 1:-1].fill(6.0)
cp.testing.assert_allclose(dst_arr, expected)
def test_averaging_kernel(): def test_averaging_kernel():
size = (40, 55) size = (40, 55)
src_arr = np.random.rand(*size) src_arr = np.random.rand(*size)
...@@ -44,7 +95,7 @@ def test_averaging_kernel(): ...@@ -44,7 +95,7 @@ def test_averaging_kernel():
) )
config = CreateKernelConfig(target=Target.GPU) config = CreateKernelConfig(target=Target.GPU)
ast = create_kernel(sympy_cse_on_assignment_list([update_rule]), config=config) ast = create_kernel(update_rule, config=config)
kernel = ast.compile() kernel = ast.compile()
gpu_src_arr = cp.asarray(src_arr) gpu_src_arr = cp.asarray(src_arr)
...@@ -70,7 +121,7 @@ def test_variable_sized_fields(): ...@@ -70,7 +121,7 @@ def test_variable_sized_fields():
) )
config = CreateKernelConfig(target=Target.GPU) config = CreateKernelConfig(target=Target.GPU)
ast = create_kernel(sympy_cse_on_assignment_list([update_rule]), config=config) ast = create_kernel(update_rule, config=config)
kernel = ast.compile() kernel = ast.compile()
size = (3, 3) size = (3, 3)
...@@ -142,7 +193,7 @@ def test_ghost_layer(): ...@@ -142,7 +193,7 @@ def test_ghost_layer():
config.ghost_layers = ghost_layers config.ghost_layers = ghost_layers
config.gpu.indexing_scheme = "blockwise4d" config.gpu.indexing_scheme = "blockwise4d"
ast = create_kernel(sympy_cse_on_assignment_list([update_rule]), config=config) ast = create_kernel(update_rule, config=config)
kernel = ast.compile() kernel = ast.compile()
gpu_src_arr = cp.asarray(src_arr) gpu_src_arr = cp.asarray(src_arr)
...@@ -170,8 +221,8 @@ def test_setting_value(): ...@@ -170,8 +221,8 @@ def test_setting_value():
config.target = Target.CUDA config.target = Target.CUDA
config.iteration_slice = iteration_slice config.iteration_slice = iteration_slice
config.gpu.indexing_scheme = "blockwise4d" config.gpu.indexing_scheme = "blockwise4d"
ast = create_kernel(sympy_cse_on_assignment_list(update_rule), config=config) ast = create_kernel(update_rule, config=config)
kernel = ast.compile() kernel = ast.compile()
kernel(f=arr_gpu, value=np.float64(42.0)) kernel(f=arr_gpu, value=np.float64(42.0))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment