diff --git a/pytest.ini b/pytest.ini index 707a43b4548e99e8e6862e1b48a1844e4318b55e..744a74bc781b3e03568e3c3a67cefbe9395bd713 100644 --- a/pytest.ini +++ b/pytest.ini @@ -64,6 +64,7 @@ exclude_lines = if 0: if False: if __name__ == .__main__.: + assert False # Don't cover type checking imports if TYPE_CHECKING: diff --git a/tests/kernelcreation/test_domain_kernels.py b/tests/kernelcreation/test_domain_kernels.py index da261faec49940df31d59f44651956e2012b113a..0d71dbe1a250c865c0f637aa3a125837abfe39e7 100644 --- a/tests/kernelcreation/test_domain_kernels.py +++ b/tests/kernelcreation/test_domain_kernels.py @@ -32,14 +32,7 @@ def inspect_dp_kernel(kernel: Kernel, gen_config: CreateKernelConfig): assert "_mm512_storeu_pd" in code -def test_filter_kernel(gen_config): - if gen_config.target == Target.CUDA: - import cupy as cp - - xp = cp - else: - xp = np - +def test_filter_kernel(gen_config, xp): weight = sp.Symbol("weight") stencil = [[1, 1, 1], [1, 1, 1], [1, 1, 1]] @@ -62,14 +55,7 @@ def test_filter_kernel(gen_config): xp.testing.assert_allclose(dst_arr, expected) -def test_filter_kernel_fixedsize(gen_config): - if gen_config.target == Target.CUDA: - import cupy as cp - - xp = cp - else: - xp = np - +def test_filter_kernel_fixedsize(gen_config, xp): weight = sp.Symbol("weight") stencil = [[1, 1, 1], [1, 1, 1], [1, 1, 1]] diff --git a/tests/kernelcreation/test_gpu.py b/tests/kernelcreation/test_gpu.py index 621e4c2514736f2dc2bb9b163931cba4af63bddc..d80647fb6a620182c008a9cb9a65adb8f16b0b4e 100644 --- a/tests/kernelcreation/test_gpu.py +++ b/tests/kernelcreation/test_gpu.py @@ -11,10 +11,9 @@ from pystencils import ( CreateKernelConfig, create_kernel, Target, + assignment_from_stencil, ) -# from pystencils.gpu import BlockIndexing -from pystencils.simp import sympy_cse_on_assignment_list from pystencils.slicing import ( add_ghost_layers, make_slice, @@ -30,6 +29,58 @@ except ImportError: pytest.skip(reason="CuPy is not available", allow_module_level=True) +@pytest.mark.parametrize("indexing_scheme", ["linear3d", "blockwise4d"]) +@pytest.mark.parametrize("omit_range_check", [False, True]) +@pytest.mark.parametrize("manual_grid", [False, True]) +def test_indexing_options( + indexing_scheme: str, omit_range_check: bool, manual_grid: bool +): + src, dst = fields("src, dst: [3D]") + asm = Assignment( + dst.center(), + src[-1, 0, 0] + + src[1, 0, 0] + + src[0, -1, 0] + + src[0, 1, 0] + + src[0, 0, -1] + + src[0, 0, 1], + ) + + cfg = CreateKernelConfig(target=Target.CUDA) + cfg.gpu.indexing_scheme = indexing_scheme + cfg.gpu.omit_range_check = omit_range_check + cfg.gpu.manual_launch_grid = manual_grid + + ast = create_kernel(asm, cfg) + kernel = ast.compile() + + src_arr = cp.ones((18, 34, 42)) + dst_arr = cp.zeros_like(src_arr) + + if manual_grid: + match indexing_scheme: + case "linear3d": + kernel.launch_config.block_size = (10, 8, 8) + kernel.launch_config.grid_size = (4, 4, 2) + case "blockwise4d": + kernel.launch_config.block_size = (40, 1, 1) + kernel.launch_config.grid_size = (32, 16, 1) + + elif indexing_scheme == "linear3d": + kernel.launch_config.block_size = ( + 10, + 8, + 8, + ) # must fit the src_arr shape (without ghost layers) + + kernel(src=src_arr, dst=dst_arr) + + expected = cp.zeros_like(src_arr) + expected[1:-1, 1:-1, 1:-1].fill(6.0) + + cp.testing.assert_allclose(dst_arr, expected) + + def test_averaging_kernel(): size = (40, 55) src_arr = np.random.rand(*size) @@ -44,7 +95,7 @@ def test_averaging_kernel(): ) config = CreateKernelConfig(target=Target.GPU) - ast = create_kernel(sympy_cse_on_assignment_list([update_rule]), config=config) + ast = create_kernel(update_rule, config=config) kernel = ast.compile() gpu_src_arr = cp.asarray(src_arr) @@ -70,7 +121,7 @@ def test_variable_sized_fields(): ) config = CreateKernelConfig(target=Target.GPU) - ast = create_kernel(sympy_cse_on_assignment_list([update_rule]), config=config) + ast = create_kernel(update_rule, config=config) kernel = ast.compile() size = (3, 3) @@ -142,7 +193,7 @@ def test_ghost_layer(): config.ghost_layers = ghost_layers config.gpu.indexing_scheme = "blockwise4d" - ast = create_kernel(sympy_cse_on_assignment_list([update_rule]), config=config) + ast = create_kernel(update_rule, config=config) kernel = ast.compile() gpu_src_arr = cp.asarray(src_arr) @@ -170,8 +221,8 @@ def test_setting_value(): config.target = Target.CUDA config.iteration_slice = iteration_slice config.gpu.indexing_scheme = "blockwise4d" - - ast = create_kernel(sympy_cse_on_assignment_list(update_rule), config=config) + + ast = create_kernel(update_rule, config=config) kernel = ast.compile() kernel(f=arr_gpu, value=np.float64(42.0))