Skip to content
Snippets Groups Projects
Commit 686a3ad8 authored by Michael Kuron's avatar Michael Kuron :mortar_board:
Browse files

Vectorization tests: run with all available instruction sets, add test for maskStore

parent b1522533
No related branches found
No related tags found
1 merge request!233Vectorization: improve test coverage
Pipeline #31650 passed
...@@ -263,8 +263,16 @@ class CBackend: ...@@ -263,8 +263,16 @@ class CBackend:
if mask != True: # NOQA if mask != True: # NOQA
instr = 'maskStore' if aligned else 'maskStoreU' instr = 'maskStore' if aligned else 'maskStoreU'
printed_mask = self.sympy_printer.doprint(mask) printed_mask = self.sympy_printer.doprint(mask)
if self._vector_instruction_set['dataTypePrefix']['double'] == '__mm256d': if data_type.base_type.base_name == 'double':
printed_mask = f"_mm256_castpd_si256({printed_mask})" if self._vector_instruction_set['double'] == '__m256d':
printed_mask = f"_mm256_castpd_si256({printed_mask})"
elif self._vector_instruction_set['double'] == '__m128d':
printed_mask = f"_mm_castpd_si128({printed_mask})"
elif data_type.base_type.base_name == 'float':
if self._vector_instruction_set['float'] == '__m256':
printed_mask = f"_mm256_castps_si256({printed_mask})"
elif self._vector_instruction_set['float'] == '__m128':
printed_mask = f"_mm_castps_si128({printed_mask})"
rhs_type = get_type_of_expression(node.rhs) rhs_type = get_type_of_expression(node.rhs)
if type(rhs_type) is not VectorType: if type(rhs_type) is not VectorType:
......
...@@ -57,23 +57,9 @@ def get_vector_instruction_set_x86(data_type='double', instruction_set='avx'): ...@@ -57,23 +57,9 @@ def get_vector_instruction_set_x86(data_type='double', instruction_set='avx'):
'storeU': 'storeu[0,1]', 'storeU': 'storeu[0,1]',
'storeA': 'store[0,1]', 'storeA': 'store[0,1]',
'stream': 'stream[0,1]', 'stream': 'stream[0,1]',
'maskstore': 'mask_store[0, 2, 1]' if instruction_set == 'avx512' else 'maskstore[0, 2, 1]', 'maskStore': 'mask_store[0, 2, 1]' if instruction_set == 'avx512' else 'maskstore[0, 2, 1]',
'maskload': 'mask_load[0, 2, 1]' if instruction_set == 'avx512' else 'maskload[0, 2, 1]' 'maskStoreU': 'mask_storeu[0, 2, 1]' if instruction_set == 'avx512' else 'maskstore[0, 2, 1]',
} }
if instruction_set == 'avx512':
base_names.update({
'maskStore': 'mask_store[0, 2, 1]',
'maskStoreU': 'mask_storeu[0, 2, 1]',
'maskLoad': 'mask_load[2, 1, 0]',
'maskLoadU': 'mask_loadu[2, 1, 0]'
})
if instruction_set == 'avx':
base_names.update({
'maskStore': 'maskstore[0, 2, 1]',
'maskStoreU': 'maskstore[0, 2, 1]',
'maskLoad': 'maskload[0, 1]',
'maskLoadU': 'maskloadu[0, 1]'
})
for comparison_op, constant in comparisons.items(): for comparison_op, constant in comparisons.items():
base_names[comparison_op] = f'cmp[0, 1, {constant}]' base_names[comparison_op] = f'cmp[0, 1, {constant}]'
......
...@@ -75,3 +75,26 @@ def test_boolean_before_loop(): ...@@ -75,3 +75,26 @@ def test_boolean_before_loop():
np.testing.assert_array_equal(g_arr, 1.0) np.testing.assert_array_equal(g_arr, 1.0)
kernel(f=f_arr, g=g_arr, t2=-1.0) kernel(f=f_arr, g=g_arr, t2=-1.0)
np.testing.assert_array_equal(g_arr, 42.0) np.testing.assert_array_equal(g_arr, 42.0)
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
@pytest.mark.parametrize('dtype', ('float', 'double'))
def test_vec_maskstore(instruction_set, dtype):
if instruction_set in ['neon', 'vsx']:
pytest.skip('no mask-store instructions available')
data_arr = np.zeros((16, 16), dtype=np.float64 if dtype == 'double' else np.float32)
data_arr[4:-4, 4:-4] = 1.0
data = ps.fields(f"data: {dtype}[2D]", data=data_arr)
c = [
Conditional(data.center() < 1.0, Block([
ps.Assignment(data.center(), 2.0)
]))
]
ast = ps.create_kernel(c, target='cpu',
cpu_vectorize_info={'instruction_set': instruction_set})
ps.show_code(ast)
kernel = ast.compile()
kernel(data=data_arr)
np.testing.assert_equal(data_arr[0:4, :], 2.0)
np.testing.assert_equal(data_arr[4:-4, 4:-4], 1.0)
...@@ -14,7 +14,7 @@ else: ...@@ -14,7 +14,7 @@ else:
instruction_set = None instruction_set = None
def test_vector_type_propagation(): def test_vector_type_propagation(instruction_set=instruction_set):
a, b, c, d, e = sp.symbols("a b c d e") a, b, c, d, e = sp.symbols("a b c d e")
arr = np.ones((2 ** 2 + 2, 2 ** 3 + 2)) arr = np.ones((2 ** 2 + 2, 2 ** 3 + 2))
arr *= 10.0 arr *= 10.0
...@@ -33,7 +33,7 @@ def test_vector_type_propagation(): ...@@ -33,7 +33,7 @@ def test_vector_type_propagation():
np.testing.assert_equal(dst[1:-1, 1:-1], 2 * 10.0 + 3) np.testing.assert_equal(dst[1:-1, 1:-1], 2 * 10.0 + 3)
def test_aligned_and_nt_stores(openmp=False): def test_aligned_and_nt_stores(instruction_set=instruction_set, openmp=False):
domain_size = (24, 24) domain_size = (24, 24)
# create a datahandling object # create a datahandling object
dh = ps.create_data_handling(domain_size, periodicity=(True, True), parallel=False, default_target='cpu') dh = ps.create_data_handling(domain_size, periodicity=(True, True), parallel=False, default_target='cpu')
...@@ -63,11 +63,11 @@ def test_aligned_and_nt_stores(openmp=False): ...@@ -63,11 +63,11 @@ def test_aligned_and_nt_stores(openmp=False):
dh.run_kernel(kernel) dh.run_kernel(kernel)
np.testing.assert_equal(np.sum(dh.cpu_arrays['f']), np.prod(domain_size)) np.testing.assert_equal(np.sum(dh.cpu_arrays['f']), np.prod(domain_size))
def test_aligned_and_nt_stores_openmp(): def test_aligned_and_nt_stores_openmp(instruction_set=instruction_set):
test_aligned_and_nt_stores(True) test_aligned_and_nt_stores(instruction_set, True)
def test_inplace_update(): def test_inplace_update(instruction_set=instruction_set):
shape = (9, 9, 3) shape = (9, 9, 3)
arr = np.ones(shape, order='f') arr = np.ones(shape, order='f')
...@@ -88,7 +88,7 @@ def test_inplace_update(): ...@@ -88,7 +88,7 @@ def test_inplace_update():
np.testing.assert_equal(arr, 2) np.testing.assert_equal(arr, 2)
def test_vectorization_fixed_size(): def test_vectorization_fixed_size(instruction_set=instruction_set):
configurations = [] configurations = []
# Fixed size - multiple of four # Fixed size - multiple of four
arr = np.ones((20 + 2, 24 + 2)) * 5.0 arr = np.ones((20 + 2, 24 + 2)) * 5.0
...@@ -115,7 +115,7 @@ def test_vectorization_fixed_size(): ...@@ -115,7 +115,7 @@ def test_vectorization_fixed_size():
np.testing.assert_equal(dst[1:-1, 1:-1], 5 * 5.0 + 42.0) np.testing.assert_equal(dst[1:-1, 1:-1], 5 * 5.0 + 42.0)
def test_vectorization_variable_size(): def test_vectorization_variable_size(instruction_set=instruction_set):
f, g = ps.fields("f, g : double[2D]") f, g = ps.fields("f, g : double[2D]")
update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)] update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)]
ast = ps.create_kernel(update_rule) ast = ps.create_kernel(update_rule)
...@@ -131,7 +131,7 @@ def test_vectorization_variable_size(): ...@@ -131,7 +131,7 @@ def test_vectorization_variable_size():
np.testing.assert_equal(dst[1:-1, 1:-1], 5 * 5.0 + 42.0) np.testing.assert_equal(dst[1:-1, 1:-1], 5 * 5.0 + 42.0)
def test_piecewise1(): def test_piecewise1(instruction_set=instruction_set):
a, b, c, d, e = sp.symbols("a b c d e") a, b, c, d, e = sp.symbols("a b c d e")
arr = np.ones((2 ** 3 + 2, 2 ** 4 + 2)) * 5.0 arr = np.ones((2 ** 3 + 2, 2 ** 4 + 2)) * 5.0
...@@ -149,7 +149,7 @@ def test_piecewise1(): ...@@ -149,7 +149,7 @@ def test_piecewise1():
np.testing.assert_equal(dst[1:-1, 1:-1], 5 + 3 + 5.0) np.testing.assert_equal(dst[1:-1, 1:-1], 5 + 3 + 5.0)
def test_piecewise2(): def test_piecewise2(instruction_set=instruction_set):
arr = np.zeros((20, 20)) arr = np.zeros((20, 20))
@ps.kernel @ps.kernel
...@@ -167,7 +167,7 @@ def test_piecewise2(): ...@@ -167,7 +167,7 @@ def test_piecewise2():
np.testing.assert_equal(arr, np.ones_like(arr)) np.testing.assert_equal(arr, np.ones_like(arr))
def test_piecewise3(): def test_piecewise3(instruction_set=instruction_set):
arr = np.zeros((22, 22)) arr = np.zeros((22, 22))
@ps.kernel @ps.kernel
...@@ -181,7 +181,7 @@ def test_piecewise3(): ...@@ -181,7 +181,7 @@ def test_piecewise3():
ast.compile() ast.compile()
def test_logical_operators(): def test_logical_operators(instruction_set=instruction_set):
arr = np.zeros((22, 22)) arr = np.zeros((22, 22))
@ps.kernel @ps.kernel
...@@ -220,7 +220,7 @@ def test_hardware_query(): ...@@ -220,7 +220,7 @@ def test_hardware_query():
any([iset.startswith('sve') for iset in supported_instruction_sets]) any([iset.startswith('sve') for iset in supported_instruction_sets])
def test_vectorised_pow(): def test_vectorised_pow(instruction_set=instruction_set):
arr = np.zeros((24, 24)) arr = np.zeros((24, 24))
f, g = ps.fields(f=arr, g=arr) f, g = ps.fields(f=arr, g=arr)
...@@ -256,7 +256,7 @@ def test_vectorised_pow(): ...@@ -256,7 +256,7 @@ def test_vectorised_pow():
ast.compile() ast.compile()
def test_vectorised_fast_approximations(): def test_vectorised_fast_approximations(instruction_set=instruction_set):
arr = np.zeros((24, 24)) arr = np.zeros((24, 24))
f, g = ps.fields(f=arr, g=arr) f, g = ps.fields(f=arr, g=arr)
......
...@@ -57,15 +57,13 @@ def test_vectorized_abs(instruction_set, dtype): ...@@ -57,15 +57,13 @@ def test_vectorized_abs(instruction_set, dtype):
@pytest.mark.parametrize('instruction_set', supported_instruction_sets) @pytest.mark.parametrize('instruction_set', supported_instruction_sets)
@pytest.mark.parametrize('gl_field, gl_kernel', [(1, 0), (0, 1), (1, 1)]) @pytest.mark.parametrize('gl_field, gl_kernel', [(1, 0), (0, 1), (1, 1)])
def test_alignment_and_correct_ghost_layers(gl_field, gl_kernel, instruction_set, dtype): def test_alignment_and_correct_ghost_layers(gl_field, gl_kernel, instruction_set, dtype):
itemsize = 8 if dtype == 'double' else 4
alignment = get_vector_instruction_set(dtype, instruction_set)['width'] * itemsize
dtype = np.float64 if dtype == 'double' else np.float32 dtype = np.float64 if dtype == 'double' else np.float32
domain_size = (128, 128) domain_size = (128, 128)
dh = ps.create_data_handling(domain_size, periodicity=(True, True), default_target='cpu') dh = ps.create_data_handling(domain_size, periodicity=(True, True), default_target='cpu')
src = dh.add_array("src", values_per_cell=1, dtype=dtype, ghost_layers=gl_field, alignment=alignment) src = dh.add_array("src", values_per_cell=1, dtype=dtype, ghost_layers=gl_field, alignment=True)
dh.fill(src.name, 1.0, ghost_layers=True) dh.fill(src.name, 1.0, ghost_layers=True)
dst = dh.add_array("dst", values_per_cell=1, dtype=dtype, ghost_layers=gl_field, alignment=alignment) dst = dh.add_array("dst", values_per_cell=1, dtype=dtype, ghost_layers=gl_field, alignment=True)
dh.fill(dst.name, 1.0, ghost_layers=True) dh.fill(dst.name, 1.0, ghost_layers=True)
update_rule = ps.Assignment(dst[0, 0], src[0, 0]) update_rule = ps.Assignment(dst[0, 0], src[0, 0])
...@@ -90,3 +88,11 @@ def test_cacheline_size(instruction_set): ...@@ -90,3 +88,11 @@ def test_cacheline_size(instruction_set):
assert cacheline_size > 8 and cacheline_size < 0x100000, "Cache line size is implausible" assert cacheline_size > 8 and cacheline_size < 0x100000, "Cache line size is implausible"
assert cacheline_size % vector_size == 0, "Cache line size should be multiple of vector size" assert cacheline_size % vector_size == 0, "Cache line size should be multiple of vector size"
assert cacheline_size & (cacheline_size - 1) == 0, "Cache line size is not a power of 2" assert cacheline_size & (cacheline_size - 1) == 0, "Cache line size is not a power of 2"
# test_vectorization is not parametrized because it is supposed to run without pytest, so we parametrize it here
from pystencils_tests import test_vectorization
@pytest.mark.parametrize('instruction_set', set(supported_instruction_sets) - set([test_vectorization.instruction_set]))
@pytest.mark.parametrize('function', [f for f in test_vectorization.__dict__ if f.startswith('test_') and f != 'test_hardware_query'])
def test_vectorization_other(instruction_set, function):
test_vectorization.__dict__[function](instruction_set)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment