Skip to content
Snippets Groups Projects
Commit 40b3d172 authored by Markus Holzer's avatar Markus Holzer
Browse files

Use all written field sizes only in special case

parent ff36a6b6
No related branches found
No related tags found
1 merge request!302Regression !300
......@@ -193,6 +193,10 @@ class KernelFunction(Node):
# function that compiles the node to a Python callable, is set by the backends
self._compile_function = compile_function
self.assignments = assignments
# If nontemporal stores are activated together with the Neon instruction set it results in cacheline zeroing
# For cacheline zeroing the information of the field size for each field is needed. Thus, in this case
# all field sizes are kernel parameters and not just the common field size used for the loops
self.use_all_written_field_sizes = False
@property
def target(self):
......@@ -233,7 +237,8 @@ class KernelFunction(Node):
@property
def fields_written(self) -> Set[Field]:
assignments = self.atoms(SympyAssignment)
return {a.lhs.field for a in assignments if isinstance(a.lhs, ResolvedFieldAccess)}
return set().union(itertools.chain.from_iterable([f.field for f in a.lhs.free_symbols if hasattr(f, 'field')]
for a in assignments))
@property
def fields_read(self) -> Set[Field]:
......@@ -247,6 +252,11 @@ class KernelFunction(Node):
This function is expensive, cache the result where possible!
"""
field_map = {f.name: f for f in self.fields_accessed}
sizes = set()
if self.use_all_written_field_sizes:
sizes = set().union(*(a.shape[:a.spatial_dimensions] for a in self.fields_written))
sizes = filter(lambda s: isinstance(s, FieldShapeSymbol), sizes)
def get_fields(symbol):
if hasattr(symbol, 'field_name'):
......@@ -256,6 +266,7 @@ class KernelFunction(Node):
return ()
argument_symbols = self._body.undefined_symbols - self.global_variables
argument_symbols.update(sizes)
parameters = [self.Parameter(symbol, get_fields(symbol)) for symbol in argument_symbols]
if hasattr(self, 'indexing'):
parameters += [self.Parameter(s, []) for s in self.indexing.symbolic_parameters()]
......@@ -621,12 +632,7 @@ class SympyAssignment(Node):
result.update(loop_counters)
result.update(self._lhs_symbol.atoms(sp.Symbol))
sizes = set().union(*(a.field.shape[:a.field.spatial_dimensions]
for a in self._lhs_symbol.atoms(ResolvedFieldAccess)))
sizes = filter(lambda s: isinstance(s, FieldShapeSymbol), sizes)
result.update(sizes)
return result
@property
......
......@@ -127,6 +127,9 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best',
vector_is = get_vector_instruction_set(default_float_type, instruction_set=instruction_set)
kernel_ast.instruction_set = vector_is
if nontemporal and 'cachelineZero' in vector_is:
kernel_ast.use_all_written_field_sizes = True
strided = 'storeS' in vector_is and 'loadS' in vector_is
keep_loop_stop = '{loop_stop}' in vector_is['storeA' if assume_aligned else 'storeU']
vectorize_inner_loops_and_adapt_load_stores(kernel_ast, assume_aligned, nontemporal,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment