Skip to content
Snippets Groups Projects

Add SVE nontemporal stores and scatters, including masked variants

Merged Michael Kuron requested to merge sve into master
Files
2
@@ -280,12 +280,12 @@ class CBackend:
if type(lhs_type) is VectorType and isinstance(node.lhs, CastFunc):
arg, data_type, aligned, nontemporal, mask, stride = node.lhs.args
instr = 'storeU'
if aligned:
if nontemporal and 'storeA' not in self._vector_instruction_set and \
'stream' in self._vector_instruction_set:
instr = 'stream'
elif aligned:
instr = 'stream' if nontemporal and 'stream' in self._vector_instruction_set else 'storeA'
if mask != True: # NOQA
if nontemporal and 'storeA' not in self._vector_instruction_set:
# aligned is not set when the instruction set has no aligned store
aligned = True
instr = 'maskStream' if nontemporal and 'maskStream' in self._vector_instruction_set else \
'maskStoreA' if aligned else 'maskStoreU'
if instr not in self._vector_instruction_set:
@@ -321,7 +321,8 @@ class CBackend:
if stride != 1:
instr = ('maskStreamS' if nontemporal and 'maskStreamS' in self._vector_instruction_set else
'maskStoreS') if mask != True else ('streamS' if nontemporal else 'storeS') # NOQA
'maskStoreS') if mask != True else \
('streamS' if nontemporal and 'streamS' in self._vector_instruction_set else 'storeS') # NOQA
return self._vector_instruction_set[instr].format(ptr, self.sympy_printer.doprint(rhs),
stride, printed_mask, **self._kwargs) + ';'
@@ -354,8 +355,13 @@ class CBackend:
code = 'const ' + self._print(node.lhs.dtype).replace(' const', '') + ' ' + tmpvar + ' = ' \
+ self.sympy_printer.doprint(rhs) + ';'
code1 = self._vector_instruction_set[instr].format(ptr, tmpvar, printed_mask, **self._kwargs) + ';'
code2 = self._vector_instruction_set['storeAAndFlushCacheline'].format(ptr, tmpvar, printed_mask,
**self._kwargs) + ';'
instr2 = 'maskStoreAAndFlushCacheline' if mask != True else 'storeAAndFlushCacheline' # NOQA
if instr2 not in self._vector_instruction_set:
self._vector_instruction_set[instr2] = self._vector_instruction_set['storeAAndFlushCacheline'] \
.format('{0}', self._vector_instruction_set['blendv'].format(
self._vector_instruction_set[load].format('{0}', **self._kwargs),
'{1}', '{2}', **self._kwargs), **self._kwargs)
code2 = self._vector_instruction_set[instr2].format(ptr, tmpvar, printed_mask, **self._kwargs) + ';'
code += f"\nif ({flushcond}) {{\n\t{code2}\n}} else {{\n\t{code1}\n}}"
return pre_code + code
else: