Skip to content
Snippets Groups Projects
Commit c331d24f authored by Michael Kuron's avatar Michael Kuron :mortar_board:
Browse files

maskStore improvements

- fix the aligned version
- make sure the test case is incommensurate with the vector width
- implement a fallback for instruction sets that don't support it natively
parent 059de5fb
Branches
Tags
No related merge requests found
...@@ -261,7 +261,11 @@ class CBackend: ...@@ -261,7 +261,11 @@ class CBackend:
if aligned: if aligned:
instr = 'stream' if nontemporal and 'stream' in self._vector_instruction_set else 'storeA' instr = 'stream' if nontemporal and 'stream' in self._vector_instruction_set else 'storeA'
if mask != True: # NOQA if mask != True: # NOQA
instr = 'maskStore' if aligned else 'maskStoreU' instr = 'maskStoreA' if aligned else 'maskStoreU'
if instr not in self._vector_instruction_set:
self._vector_instruction_set[instr] = self._vector_instruction_set['store' + instr[-1]].format(
'{0}', self._vector_instruction_set['blendv'].format(
self._vector_instruction_set['load' + instr[-1]].format('{0}'), '{1}', '{2}'))
printed_mask = self.sympy_printer.doprint(mask) printed_mask = self.sympy_printer.doprint(mask)
if data_type.base_type.base_name == 'double': if data_type.base_type.base_name == 'double':
if self._vector_instruction_set['double'] == '__m256d': if self._vector_instruction_set['double'] == '__m256d':
......
...@@ -57,7 +57,7 @@ def get_vector_instruction_set_x86(data_type='double', instruction_set='avx'): ...@@ -57,7 +57,7 @@ def get_vector_instruction_set_x86(data_type='double', instruction_set='avx'):
'storeU': 'storeu[0,1]', 'storeU': 'storeu[0,1]',
'storeA': 'store[0,1]', 'storeA': 'store[0,1]',
'stream': 'stream[0,1]', 'stream': 'stream[0,1]',
'maskStore': 'mask_store[0, 2, 1]' if instruction_set == 'avx512' else 'maskstore[0, 2, 1]', 'maskStoreA': 'mask_store[0, 2, 1]' if instruction_set == 'avx512' else 'maskstore[0, 2, 1]',
'maskStoreU': 'mask_storeu[0, 2, 1]' if instruction_set == 'avx512' else 'maskstore[0, 2, 1]', 'maskStoreU': 'mask_storeu[0, 2, 1]' if instruction_set == 'avx512' else 'maskstore[0, 2, 1]',
} }
......
...@@ -80,10 +80,8 @@ def test_boolean_before_loop(): ...@@ -80,10 +80,8 @@ def test_boolean_before_loop():
@pytest.mark.parametrize('instruction_set', supported_instruction_sets) @pytest.mark.parametrize('instruction_set', supported_instruction_sets)
@pytest.mark.parametrize('dtype', ('float', 'double')) @pytest.mark.parametrize('dtype', ('float', 'double'))
def test_vec_maskstore(instruction_set, dtype): def test_vec_maskstore(instruction_set, dtype):
if instruction_set in ['neon', 'vsx']:
pytest.skip('no mask-store instructions available')
data_arr = np.zeros((16, 16), dtype=np.float64 if dtype == 'double' else np.float32) data_arr = np.zeros((16, 16), dtype=np.float64 if dtype == 'double' else np.float32)
data_arr[4:-4, 4:-4] = 1.0 data_arr[3:-3, 3:-3] = 1.0
data = ps.fields(f"data: {dtype}[2D]", data=data_arr) data = ps.fields(f"data: {dtype}[2D]", data=data_arr)
c = [ c = [
...@@ -93,8 +91,10 @@ def test_vec_maskstore(instruction_set, dtype): ...@@ -93,8 +91,10 @@ def test_vec_maskstore(instruction_set, dtype):
] ]
ast = ps.create_kernel(c, target='cpu', ast = ps.create_kernel(c, target='cpu',
cpu_vectorize_info={'instruction_set': instruction_set}) cpu_vectorize_info={'instruction_set': instruction_set})
ps.show_code(ast)
kernel = ast.compile() kernel = ast.compile()
kernel(data=data_arr) kernel(data=data_arr)
np.testing.assert_equal(data_arr[0:4, :], 2.0) np.testing.assert_equal(data_arr[:3, :], 2.0)
np.testing.assert_equal(data_arr[4:-4, 4:-4], 1.0) np.testing.assert_equal(data_arr[-3:, :], 2.0)
np.testing.assert_equal(data_arr[:, :3], 2.0)
np.testing.assert_equal(data_arr[:, -3:], 2.0)
np.testing.assert_equal(data_arr[3:-3, 3:-3], 1.0)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment