Extend test_reduction_gpu with assume_warp_aligned_block_size and use_block_fitting parameters

f469e70e · Richard Angersbach · b008a9e9 · f469e70e
Commit f469e70e authored 3 months ago by Richard Angersbach
--- a/tests/kernelcreation/test_reduction.py
+++ b/tests/kernelcreation/test_reduction.py
@@ -52,7 +52,14 @@ def test_reduction_cpu(instruction_set, dtype, op):

 @pytest.mark.parametrize("dtype", ["float64", "float32"])
 @pytest.mark.parametrize("op", ["+", "-", "*", "min", "max"])
-def test_reduction_gpu(dtype, op):
+@pytest.mark.parametrize("assume_warp_aligned_block_size", [True, False])
+@pytest.mark.parametrize("use_block_fitting", [True, False])
+def test_reduction_gpu(
+        dtype: str,
+        op: str,
+        assume_warp_aligned_block_size: bool,
+        use_block_fitting: bool,
+):
    try:
        import cupy as cp
        from cupy_backends.cuda.api.runtime import CUDARuntimeError
@@ -66,12 +73,16 @@ def test_reduction_gpu(dtype, op):
            reason="No CUDA capable device is detected", allow_module_level=True
        )

-    config = ps.CreateKernelConfig(target=ps.Target.GPU)
+    cfg = ps.CreateKernelConfig(target=ps.Target.GPU)
+    cfg.gpu.assume_warp_aligned_block_size = assume_warp_aligned_block_size

-    ast_reduction = get_reduction_assign_ast(dtype, op, config)
+    ast_reduction = get_reduction_assign_ast(dtype, op, cfg)
    ps.show_code(ast_reduction)
    kernel_reduction = ast_reduction.compile()

+    if use_block_fitting:
+        kernel_reduction.launch_config.fit_block_size((32, 1, 1))
+
    array = np.full((SIZE,), INIT_ARR, dtype=dtype)
    reduction_array = np.full((1,), INIT_W, dtype=dtype)