Skip to content
Snippets Groups Projects
Commit 8f92d147 authored by Michael Kuron's avatar Michael Kuron :mortar_board:
Browse files

remove stream from instruction sets that don't have it

parent bcd2d628
Branches
Tags
1 merge request!230Improve non-temporal stores
Pipeline #31347 passed
...@@ -28,7 +28,6 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'): ...@@ -28,7 +28,6 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'):
'loadA': 'ld1[0]', 'loadA': 'ld1[0]',
'storeU': 'st1[0, 1]', 'storeU': 'st1[0, 1]',
'storeA': 'st1[0, 1]', 'storeA': 'st1[0, 1]',
'stream': 'st1[0, 1]',
'abs': 'abs[0]', 'abs': 'abs[0]',
'==': 'ceq[0, 1]', '==': 'ceq[0, 1]',
......
...@@ -259,7 +259,7 @@ class CBackend: ...@@ -259,7 +259,7 @@ class CBackend:
arg, data_type, aligned, nontemporal, mask = node.lhs.args arg, data_type, aligned, nontemporal, mask = node.lhs.args
instr = 'storeU' instr = 'storeU'
if aligned: if aligned:
instr = 'stream' if nontemporal else 'storeA' instr = 'stream' if nontemporal and 'stream' in self._vector_instruction_set else 'storeA'
if mask != True: # NOQA if mask != True: # NOQA
instr = 'maskStore' if aligned else 'maskStoreU' instr = 'maskStore' if aligned else 'maskStoreU'
printed_mask = self.sympy_printer.doprint(mask) printed_mask = self.sympy_printer.doprint(mask)
...@@ -274,18 +274,18 @@ class CBackend: ...@@ -274,18 +274,18 @@ class CBackend:
ptr = "&" + self.sympy_printer.doprint(node.lhs.args[0]) ptr = "&" + self.sympy_printer.doprint(node.lhs.args[0])
pre_code = '' pre_code = ''
if instr == 'stream' and 'cachelineZero' in self._vector_instruction_set: if nontemporal and 'cachelineZero' in self._vector_instruction_set:
pre_code = f"if (((uintptr_t) {ptr} & {CachelineSize.mask_symbol}) == 0) " + "{\n\t" + \ pre_code = f"if (((uintptr_t) {ptr} & {CachelineSize.mask_symbol}) == 0) " + "{\n\t" + \
self._vector_instruction_set['cachelineZero'].format(ptr) + ';\n}\n' self._vector_instruction_set['cachelineZero'].format(ptr) + ';\n}\n'
code = self._vector_instruction_set[instr].format(ptr, self.sympy_printer.doprint(rhs), code = self._vector_instruction_set[instr].format(ptr, self.sympy_printer.doprint(rhs),
printed_mask) + ';' printed_mask) + ';'
flushcond = f"((uintptr_t) {ptr} & {CachelineSize.mask_symbol}) != {CachelineSize.last_symbol}" flushcond = f"((uintptr_t) {ptr} & {CachelineSize.mask_symbol}) != {CachelineSize.last_symbol}"
if instr == 'stream' and 'flushCacheline' in self._vector_instruction_set: if nontemporal and 'flushCacheline' in self._vector_instruction_set:
code2 = self._vector_instruction_set['flushCacheline'].format( code2 = self._vector_instruction_set['flushCacheline'].format(
ptr, self.sympy_printer.doprint(rhs)) + ';' ptr, self.sympy_printer.doprint(rhs)) + ';'
code = f"{code}\nif ({flushcond}) {{\n\t{code2}\n}}" code = f"{code}\nif ({flushcond}) {{\n\t{code2}\n}}"
elif instr == 'stream' and 'streamAndFlushCacheline' in self._vector_instruction_set: elif nontemporal and 'streamAndFlushCacheline' in self._vector_instruction_set:
tmpvar = '_tmp_' + hashlib.sha1(self.sympy_printer.doprint(rhs).encode('ascii')).hexdigest()[:8] tmpvar = '_tmp_' + hashlib.sha1(self.sympy_printer.doprint(rhs).encode('ascii')).hexdigest()[:8]
code = 'const ' + self._print(node.lhs.dtype).replace(' const', '') + ' ' + tmpvar + ' = ' \ code = 'const ' + self._print(node.lhs.dtype).replace(' const', '') + ' ' + tmpvar + ' = ' \
+ self.sympy_printer.doprint(rhs) + ';' + self.sympy_printer.doprint(rhs) + ';'
......
...@@ -29,7 +29,6 @@ def get_vector_instruction_set_ppc(data_type='double', instruction_set='vsx'): ...@@ -29,7 +29,6 @@ def get_vector_instruction_set_ppc(data_type='double', instruction_set='vsx'):
'loadA': 'ld[0x0, 0]', 'loadA': 'ld[0x0, 0]',
'storeU': 'xst[1, 0x0, 0]', 'storeU': 'xst[1, 0x0, 0]',
'storeA': 'st[1, 0x0, 0]', 'storeA': 'st[1, 0x0, 0]',
'stream': 'st[1, 0x0, 0]',
'streamAndFlushCacheline': 'stl[1, 0x0, 0]', 'streamAndFlushCacheline': 'stl[1, 0x0, 0]',
'abs': 'abs[0]', 'abs': 'abs[0]',
......
...@@ -48,14 +48,9 @@ def test_aligned_and_nt_stores(openmp=False): ...@@ -48,14 +48,9 @@ def test_aligned_and_nt_stores(openmp=False):
'assume_inner_stride_one': True} 'assume_inner_stride_one': True}
update_rule = [ps.Assignment(f.center(), 0.25 * (g[-1, 0] + g[1, 0] + g[0, -1] + g[0, 1]))] update_rule = [ps.Assignment(f.center(), 0.25 * (g[-1, 0] + g[1, 0] + g[0, -1] + g[0, 1]))]
ast = ps.create_kernel(update_rule, target=dh.default_target, cpu_vectorize_info=opt, cpu_openmp=openmp) ast = ps.create_kernel(update_rule, target=dh.default_target, cpu_vectorize_info=opt, cpu_openmp=openmp)
if 'streamFence' in ast.instruction_set: for instruction in ['stream', 'streamFence', 'cachelineZero', 'streamAndFlushCacheline', 'flushCacheline']:
assert ast.instruction_set['streamFence'] in ps.get_code_str(ast) if instruction in ast.instruction_set:
if 'cachelineZero' in ast.instruction_set: assert ast.instruction_set[instruction].split('{')[0] in ps.get_code_str(ast)
assert ast.instruction_set['cachelineZero'].split('{')[0] in ps.get_code_str(ast)
if 'streamAndFlushCacheline' in ast.instruction_set:
assert ast.instruction_set['streamAndFlushCacheline'].split('{')[0] in ps.get_code_str(ast)
if 'flushCacheline' in ast.instruction_set:
assert ast.instruction_set['flushCacheline'].split('{')[0] in ps.get_code_str(ast)
kernel = ast.compile() kernel = ast.compile()
dh.run_kernel(kernel) dh.run_kernel(kernel)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment