Skip to content
Snippets Groups Projects
Commit 9d9919ff authored by Michael Kuron's avatar Michael Kuron :mortar_board:
Browse files

Merge branch 'Regression' into 'master'

Fix Regression from !300

See merge request !303
parents b1308c07 082e5c2b
No related branches found
No related tags found
No related merge requests found
...@@ -193,6 +193,10 @@ class KernelFunction(Node): ...@@ -193,6 +193,10 @@ class KernelFunction(Node):
# function that compiles the node to a Python callable, is set by the backends # function that compiles the node to a Python callable, is set by the backends
self._compile_function = compile_function self._compile_function = compile_function
self.assignments = assignments self.assignments = assignments
# If nontemporal stores are activated together with the Neon instruction set it results in cacheline zeroing
# For cacheline zeroing the information of the field size for each field is needed. Thus, in this case
# all field sizes are kernel parameters and not just the common field size used for the loops
self.use_all_written_field_sizes = False
@property @property
def target(self): def target(self):
...@@ -233,7 +237,8 @@ class KernelFunction(Node): ...@@ -233,7 +237,8 @@ class KernelFunction(Node):
@property @property
def fields_written(self) -> Set[Field]: def fields_written(self) -> Set[Field]:
assignments = self.atoms(SympyAssignment) assignments = self.atoms(SympyAssignment)
return {a.lhs.field for a in assignments if isinstance(a.lhs, ResolvedFieldAccess)} return set().union(itertools.chain.from_iterable([f.field for f in a.lhs.free_symbols if hasattr(f, 'field')]
for a in assignments))
@property @property
def fields_read(self) -> Set[Field]: def fields_read(self) -> Set[Field]:
...@@ -247,6 +252,11 @@ class KernelFunction(Node): ...@@ -247,6 +252,11 @@ class KernelFunction(Node):
This function is expensive, cache the result where possible! This function is expensive, cache the result where possible!
""" """
field_map = {f.name: f for f in self.fields_accessed} field_map = {f.name: f for f in self.fields_accessed}
sizes = set()
if self.use_all_written_field_sizes:
sizes = set().union(*(a.shape[:a.spatial_dimensions] for a in self.fields_written))
sizes = filter(lambda s: isinstance(s, FieldShapeSymbol), sizes)
def get_fields(symbol): def get_fields(symbol):
if hasattr(symbol, 'field_name'): if hasattr(symbol, 'field_name'):
...@@ -256,6 +266,7 @@ class KernelFunction(Node): ...@@ -256,6 +266,7 @@ class KernelFunction(Node):
return () return ()
argument_symbols = self._body.undefined_symbols - self.global_variables argument_symbols = self._body.undefined_symbols - self.global_variables
argument_symbols.update(sizes)
parameters = [self.Parameter(symbol, get_fields(symbol)) for symbol in argument_symbols] parameters = [self.Parameter(symbol, get_fields(symbol)) for symbol in argument_symbols]
if hasattr(self, 'indexing'): if hasattr(self, 'indexing'):
parameters += [self.Parameter(s, []) for s in self.indexing.symbolic_parameters()] parameters += [self.Parameter(s, []) for s in self.indexing.symbolic_parameters()]
...@@ -622,11 +633,6 @@ class SympyAssignment(Node): ...@@ -622,11 +633,6 @@ class SympyAssignment(Node):
result.update(self._lhs_symbol.atoms(sp.Symbol)) result.update(self._lhs_symbol.atoms(sp.Symbol))
sizes = set().union(*(a.field.shape[:a.field.spatial_dimensions]
for a in self._lhs_symbol.atoms(ResolvedFieldAccess)))
sizes = filter(lambda s: isinstance(s, FieldShapeSymbol), sizes)
result.update(sizes)
return result return result
@property @property
......
...@@ -127,6 +127,8 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best', ...@@ -127,6 +127,8 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best',
vector_is = get_vector_instruction_set(default_float_type, instruction_set=instruction_set) vector_is = get_vector_instruction_set(default_float_type, instruction_set=instruction_set)
kernel_ast.instruction_set = vector_is kernel_ast.instruction_set = vector_is
if nontemporal and 'cachelineZero' in vector_is:
kernel_ast.use_all_written_field_sizes = True
strided = 'storeS' in vector_is and 'loadS' in vector_is strided = 'storeS' in vector_is and 'loadS' in vector_is
keep_loop_stop = '{loop_stop}' in vector_is['storeA' if assume_aligned else 'storeU'] keep_loop_stop = '{loop_stop}' in vector_is['storeA' if assume_aligned else 'storeU']
vectorize_inner_loops_and_adapt_load_stores(kernel_ast, assume_aligned, nontemporal, vectorize_inner_loops_and_adapt_load_stores(kernel_ast, assume_aligned, nontemporal,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment