Skip to content
Snippets Groups Projects
Commit f9a17154 authored by Michael Kuron's avatar Michael Kuron :mortar_board:
Browse files

never attempt to vectorize the tail loop

parent 4f1a99ce
No related branches found
No related tags found
1 merge request!248Fix RNG vectorization for LB
...@@ -192,7 +192,9 @@ class CBackend: ...@@ -192,7 +192,9 @@ class CBackend:
def __init__(self, sympy_printer=None, signature_only=False, vector_instruction_set=None, dialect='c'): def __init__(self, sympy_printer=None, signature_only=False, vector_instruction_set=None, dialect='c'):
if sympy_printer is None: if sympy_printer is None:
if vector_instruction_set is not None: if vector_instruction_set is not None:
self.sympy_printer = VectorizedCustomSympyPrinter(vector_instruction_set) self.vector_sympy_printer = VectorizedCustomSympyPrinter(vector_instruction_set)
self.scalar_sympy_printer = CustomSympyPrinter()
self.sympy_printer = self.vector_sympy_printer
else: else:
self.sympy_printer = CustomSympyPrinter() self.sympy_printer = CustomSympyPrinter()
else: else:
...@@ -259,6 +261,12 @@ class CBackend: ...@@ -259,6 +261,12 @@ class CBackend:
prefix = "\n".join(node.prefix_lines) prefix = "\n".join(node.prefix_lines)
if prefix: if prefix:
prefix += "\n" prefix += "\n"
if self._vector_instruction_set and hasattr(node, 'instruction_set') and node.instruction_set is None:
# the tail loop must not be vectorized
self.sympy_printer = self.scalar_sympy_printer
code = f"{prefix}{loop_str}\n{self._print(node.body)}"
self.sympy_printer = self.vector_sympy_printer
return code
return f"{prefix}{loop_str}\n{self._print(node.body)}" return f"{prefix}{loop_str}\n{self._print(node.body)}"
def _print_SympyAssignment(self, node): def _print_SympyAssignment(self, node):
......
...@@ -173,6 +173,8 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, vector_width, assume_a ...@@ -173,6 +173,8 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, vector_width, assume_a
cutting_point = modulo_floor(loop_range, vector_width) + loop_node.start cutting_point = modulo_floor(loop_range, vector_width) + loop_node.start
loop_nodes = [l for l in cut_loop(loop_node, [cutting_point]).args if isinstance(l, ast.LoopOverCoordinate)] loop_nodes = [l for l in cut_loop(loop_node, [cutting_point]).args if isinstance(l, ast.LoopOverCoordinate)]
assert len(loop_nodes) in (0, 1, 2) # 2 for main and tail loop, 1 if loop range divisible by vector width assert len(loop_nodes) in (0, 1, 2) # 2 for main and tail loop, 1 if loop range divisible by vector width
if len(loop_nodes) == 2:
loop_nodes[1].instruction_set = None
if len(loop_nodes) == 0: if len(loop_nodes) == 0:
continue continue
loop_node = loop_nodes[0] loop_node = loop_nodes[0]
...@@ -322,6 +324,9 @@ def insert_vector_casts(ast_node): ...@@ -322,6 +324,9 @@ def insert_vector_casts(ast_node):
return expr return expr
def visit_node(node, substitution_dict): def visit_node(node, substitution_dict):
if hasattr(node, 'instruction_set') and node.instruction_set is None:
# the tail loop must not be vectorized
return
substitution_dict = substitution_dict.copy() substitution_dict = substitution_dict.copy()
for arg in node.args: for arg in node.args:
if isinstance(arg, ast.SympyAssignment): if isinstance(arg, ast.SympyAssignment):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment