Skip to content
Snippets Groups Projects

Add SVE nontemporal stores and scatters, including masked variants

Merged Michael Kuron requested to merge sve into master
Files
2
@@ -280,7 +280,8 @@ class CBackend:
if type(lhs_type) is VectorType and isinstance(node.lhs, CastFunc):
arg, data_type, aligned, nontemporal, mask, stride = node.lhs.args
instr = 'storeU'
if nontemporal and 'storeA' not in self._vector_instruction_set and 'stream' in self._vector_instruction_set:
if nontemporal and 'storeA' not in self._vector_instruction_set and \
'stream' in self._vector_instruction_set:
instr = 'stream'
elif aligned:
instr = 'stream' if nontemporal and 'stream' in self._vector_instruction_set else 'storeA'
@@ -354,8 +355,13 @@ class CBackend:
code = 'const ' + self._print(node.lhs.dtype).replace(' const', '') + ' ' + tmpvar + ' = ' \
+ self.sympy_printer.doprint(rhs) + ';'
code1 = self._vector_instruction_set[instr].format(ptr, tmpvar, printed_mask, **self._kwargs) + ';'
code2 = self._vector_instruction_set['storeAAndFlushCacheline'].format(ptr, tmpvar, printed_mask,
**self._kwargs) + ';'
instr2 = 'maskStoreAAndFlushCacheline' if mask != True else 'storeAAndFlushCacheline' # NOQA
if instr2 not in self._vector_instruction_set:
self._vector_instruction_set[instr2] = self._vector_instruction_set['storeAAndFlushCacheline'] \
.format('{0}', self._vector_instruction_set['blendv'].format(
self._vector_instruction_set[load].format('{0}', **self._kwargs),
'{1}', '{2}', **self._kwargs), **self._kwargs)
code2 = self._vector_instruction_set[instr2].format(ptr, tmpvar, printed_mask, **self._kwargs) + ';'
code += f"\nif ({flushcond}) {{\n\t{code2}\n}} else {{\n\t{code1}\n}}"
return pre_code + code
else: