Skip to content
Snippets Groups Projects

Add SVE nontemporal stores and scatters, including masked variants

Merged Michael Kuron requested to merge sve into master
@@ -327,8 +327,7 @@ class CBackend:
stride, printed_mask, **self._kwargs) + ';'
pre_code = ''
blended = self._vector_instruction_set['blendv'].split('{')[0] in self._vector_instruction_set[instr]
if nontemporal and 'cachelineZero' in self._vector_instruction_set and not blended:
if nontemporal and 'cachelineZero' in self._vector_instruction_set and mask == True: # NOQA
first_cond = f"((uintptr_t) {ptr} & {CachelineSize.mask_symbol}) == 0"
offset = sp.Add(*[sp.Symbol(LoopOverCoordinate.get_loop_counter_name(i))
* node.lhs.args[0].field.spatial_strides[i] for i in
@@ -348,7 +347,7 @@ class CBackend:
code2 = self._vector_instruction_set['flushCacheline'].format(
ptr, self.sympy_printer.doprint(rhs), **self._kwargs) + ';'
code = f"{code}\nif ({flushcond}) {{\n\t{code2}\n}}"
elif nontemporal and 'storeAAndFlushCacheline' in self._vector_instruction_set:
elif aligned and nontemporal and 'storeAAndFlushCacheline' in self._vector_instruction_set:
lhs_hash = hashlib.sha1(self.sympy_printer.doprint(node.lhs).encode('ascii')).hexdigest()[:8]
rhs_hash = hashlib.sha1(self.sympy_printer.doprint(rhs).encode('ascii')).hexdigest()[:8]
tmpvar = f'_tmp_{lhs_hash}_{rhs_hash}'
@@ -359,7 +358,7 @@ class CBackend:
if instr2 not in self._vector_instruction_set:
self._vector_instruction_set[instr2] = self._vector_instruction_set['storeAAndFlushCacheline'] \
.format('{0}', self._vector_instruction_set['blendv'].format(
self._vector_instruction_set[load].format('{0}', **self._kwargs),
self._vector_instruction_set['loadA'].format('{0}', **self._kwargs),
'{1}', '{2}', **self._kwargs), **self._kwargs)
code2 = self._vector_instruction_set[instr2].format(ptr, tmpvar, printed_mask, **self._kwargs) + ';'
code += f"\nif ({flushcond}) {{\n\t{code2}\n}} else {{\n\t{code1}\n}}"