Vectorization tests: run with all available instruction sets, add test for maskStore

686a3ad8 · Michael Kuron · b1522533 · 686a3ad8 · 686a3ad8 · 686a3ad8
Commit 686a3ad8 authored 4 years ago by Michael Kuron
--- a/pystencils/backends/cbackend.py
+++ b/pystencils/backends/cbackend.py
@@ -263,8 +263,16 @@ class CBackend:
                if mask != True:  # NOQA
                    instr = 'maskStore' if aligned else 'maskStoreU'
                    printed_mask = self.sympy_printer.doprint(mask)
-                    if self._vector_instruction_set['dataTypePrefix']['double'] == '__mm256d':
+                    if data_type.base_type.base_name == 'double':
-                        printed_mask = f"_mm256_castpd_si256({printed_mask})"
+                        if self._vector_instruction_set['double'] == '__m256d':
+                            printed_mask = f"_mm256_castpd_si256({printed_mask})"
+                        elif self._vector_instruction_set['double'] == '__m128d':
+                            printed_mask = f"_mm_castpd_si128({printed_mask})"
+                    elif data_type.base_type.base_name == 'float':
+                        if self._vector_instruction_set['float'] == '__m256':
+                            printed_mask = f"_mm256_castps_si256({printed_mask})"
+                        elif self._vector_instruction_set['float'] == '__m128':
+                            printed_mask = f"_mm_castps_si128({printed_mask})"
                rhs_type = get_type_of_expression(node.rhs)
                if type(rhs_type) is not VectorType:

--- a/pystencils/backends/x86_instruction_sets.py
+++ b/pystencils/backends/x86_instruction_sets.py
@@ -57,23 +57,9 @@ def get_vector_instruction_set_x86(data_type='double', instruction_set='avx'):
        'storeU': 'storeu[0,1]',
        'storeA': 'store[0,1]',
        'stream': 'stream[0,1]',
-        'maskstore': 'mask_store[0, 2, 1]' if instruction_set == 'avx512' else 'maskstore[0, 2, 1]',
+        'maskStore': 'mask_store[0, 2, 1]' if instruction_set == 'avx512' else 'maskstore[0, 2, 1]',
-        'maskload': 'mask_load[0, 2, 1]' if instruction_set == 'avx512' else 'maskload[0, 2, 1]'
+        'maskStoreU': 'mask_storeu[0, 2, 1]' if instruction_set == 'avx512' else 'maskstore[0, 2, 1]',
    }
-    if instruction_set == 'avx512':
-        base_names.update({
-            'maskStore': 'mask_store[0, 2, 1]',
-            'maskStoreU': 'mask_storeu[0, 2, 1]',
-            'maskLoad': 'mask_load[2, 1, 0]',
-            'maskLoadU': 'mask_loadu[2, 1, 0]'
-        })
-    if instruction_set == 'avx':
-        base_names.update({
-            'maskStore': 'maskstore[0, 2, 1]',
-            'maskStoreU': 'maskstore[0, 2, 1]',
-            'maskLoad': 'maskload[0, 1]',
-            'maskLoadU': 'maskloadu[0, 1]'
-        })
    for comparison_op, constant in comparisons.items():
        base_names[comparison_op] = f'cmp[0, 1, {constant}]'

--- a/pystencils_tests/test_conditional_vec.py
+++ b/pystencils_tests/test_conditional_vec.py
@@ -75,3 +75,26 @@ def test_boolean_before_loop():
    np.testing.assert_array_equal(g_arr, 1.0)
    kernel(f=f_arr, g=g_arr, t2=-1.0)
    np.testing.assert_array_equal(g_arr, 42.0)
+@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
+@pytest.mark.parametrize('dtype', ('float', 'double'))
+def test_vec_maskstore(instruction_set, dtype):
+    if instruction_set in ['neon', 'vsx']:
+        pytest.skip('no mask-store instructions available')
+    data_arr = np.zeros((16, 16), dtype=np.float64 if dtype == 'double' else np.float32)
+    data_arr[4:-4, 4:-4] = 1.0
+    data = ps.fields(f"data: {dtype}[2D]", data=data_arr)
+    c = [
+        Conditional(data.center() < 1.0, Block([
+            ps.Assignment(data.center(), 2.0)
+        ]))
+    ]
+    ast = ps.create_kernel(c, target='cpu',
+                           cpu_vectorize_info={'instruction_set': instruction_set})
+    ps.show_code(ast)
+    kernel = ast.compile()
+    kernel(data=data_arr)
+    np.testing.assert_equal(data_arr[0:4, :], 2.0)
+    np.testing.assert_equal(data_arr[4:-4, 4:-4], 1.0)
--- a/pystencils_tests/test_vectorization.py
+++ b/pystencils_tests/test_vectorization.py
@@ -14,7 +14,7 @@ else:
    instruction_set = None
-def test_vector_type_propagation():
+def test_vector_type_propagation(instruction_set=instruction_set):
    a, b, c, d, e = sp.symbols("a b c d e")
    arr = np.ones((2 ** 2 + 2, 2 ** 3 + 2))
    arr *= 10.0
@@ -33,7 +33,7 @@ def test_vector_type_propagation():
    np.testing.assert_equal(dst[1:-1, 1:-1], 2 * 10.0 + 3)
-def test_aligned_and_nt_stores(openmp=False):
+def test_aligned_and_nt_stores(instruction_set=instruction_set, openmp=False):
    domain_size = (24, 24)
    # create a datahandling object
    dh = ps.create_data_handling(domain_size, periodicity=(True, True), parallel=False, default_target='cpu')
@@ -63,11 +63,11 @@ def test_aligned_and_nt_stores(openmp=False):
    dh.run_kernel(kernel)
    np.testing.assert_equal(np.sum(dh.cpu_arrays['f']), np.prod(domain_size))
-def test_aligned_and_nt_stores_openmp():
+def test_aligned_and_nt_stores_openmp(instruction_set=instruction_set):
-    test_aligned_and_nt_stores(True)
+    test_aligned_and_nt_stores(instruction_set, True)
-def test_inplace_update():
+def test_inplace_update(instruction_set=instruction_set):
    shape = (9, 9, 3)
    arr = np.ones(shape, order='f')
@@ -88,7 +88,7 @@ def test_inplace_update():
    np.testing.assert_equal(arr, 2)
-def test_vectorization_fixed_size():
+def test_vectorization_fixed_size(instruction_set=instruction_set):
    configurations = []
    # Fixed size - multiple of four
    arr = np.ones((20 + 2, 24 + 2)) * 5.0
@@ -115,7 +115,7 @@ def test_vectorization_fixed_size():
        np.testing.assert_equal(dst[1:-1, 1:-1], 5 * 5.0 + 42.0)
-def test_vectorization_variable_size():
+def test_vectorization_variable_size(instruction_set=instruction_set):
    f, g = ps.fields("f, g : double[2D]")
    update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)]
    ast = ps.create_kernel(update_rule)
@@ -131,7 +131,7 @@ def test_vectorization_variable_size():
    np.testing.assert_equal(dst[1:-1, 1:-1], 5 * 5.0 + 42.0)
-def test_piecewise1():
+def test_piecewise1(instruction_set=instruction_set):
    a, b, c, d, e = sp.symbols("a b c d e")
    arr = np.ones((2 ** 3 + 2, 2 ** 4 + 2)) * 5.0
@@ -149,7 +149,7 @@ def test_piecewise1():
    np.testing.assert_equal(dst[1:-1, 1:-1], 5 + 3 + 5.0)
-def test_piecewise2():
+def test_piecewise2(instruction_set=instruction_set):
    arr = np.zeros((20, 20))
    @ps.kernel
@@ -167,7 +167,7 @@ def test_piecewise2():
    np.testing.assert_equal(arr, np.ones_like(arr))
-def test_piecewise3():
+def test_piecewise3(instruction_set=instruction_set):
    arr = np.zeros((22, 22))
    @ps.kernel
@@ -181,7 +181,7 @@ def test_piecewise3():
    ast.compile()
-def test_logical_operators():
+def test_logical_operators(instruction_set=instruction_set):
    arr = np.zeros((22, 22))
    @ps.kernel
@@ -220,7 +220,7 @@ def test_hardware_query():
           any([iset.startswith('sve') for iset in supported_instruction_sets])
-def test_vectorised_pow():
+def test_vectorised_pow(instruction_set=instruction_set):
    arr = np.zeros((24, 24))
    f, g = ps.fields(f=arr, g=arr)
@@ -256,7 +256,7 @@ def test_vectorised_pow():
    ast.compile()
-def test_vectorised_fast_approximations():
+def test_vectorised_fast_approximations(instruction_set=instruction_set):
    arr = np.zeros((24, 24))
    f, g = ps.fields(f=arr, g=arr)

--- a/pystencils_tests/test_vectorization_specific.py
+++ b/pystencils_tests/test_vectorization_specific.py
@@ -57,15 +57,13 @@ def test_vectorized_abs(instruction_set, dtype):
 @pytest.mark.parametrize('instruction_set', supported_instruction_sets)
 @pytest.mark.parametrize('gl_field, gl_kernel', [(1, 0), (0, 1), (1, 1)])
 def test_alignment_and_correct_ghost_layers(gl_field, gl_kernel, instruction_set, dtype):
-    itemsize = 8 if dtype == 'double' else 4
-    alignment = get_vector_instruction_set(dtype, instruction_set)['width'] * itemsize
    dtype = np.float64 if dtype == 'double' else np.float32
    domain_size = (128, 128)
    dh = ps.create_data_handling(domain_size, periodicity=(True, True), default_target='cpu')
-    src = dh.add_array("src", values_per_cell=1, dtype=dtype, ghost_layers=gl_field, alignment=alignment)
+    src = dh.add_array("src", values_per_cell=1, dtype=dtype, ghost_layers=gl_field, alignment=True)
    dh.fill(src.name, 1.0, ghost_layers=True)
-    dst = dh.add_array("dst", values_per_cell=1, dtype=dtype, ghost_layers=gl_field, alignment=alignment)
+    dst = dh.add_array("dst", values_per_cell=1, dtype=dtype, ghost_layers=gl_field, alignment=True)
    dh.fill(dst.name, 1.0, ghost_layers=True)
    update_rule = ps.Assignment(dst[0, 0], src[0, 0])
@@ -90,3 +88,11 @@ def test_cacheline_size(instruction_set):
    assert cacheline_size > 8 and cacheline_size < 0x100000, "Cache line size is implausible"
    assert cacheline_size % vector_size == 0, "Cache line size should be multiple of vector size"
    assert cacheline_size & (cacheline_size - 1) == 0, "Cache line size is not a power of 2"
+# test_vectorization is not parametrized because it is supposed to run without pytest, so we parametrize it here
+from pystencils_tests import test_vectorization
+@pytest.mark.parametrize('instruction_set', set(supported_instruction_sets) - set([test_vectorization.instruction_set]))
+@pytest.mark.parametrize('function', [f for f in test_vectorization.__dict__ if f.startswith('test_') and f != 'test_hardware_query'])
+def test_vectorization_other(instruction_set, function):
+    test_vectorization.__dict__[function](instruction_set)