Small changes

39f90c5b · Markus Holzer · 15fea042 · 39f90c5b
Commit 39f90c5b authored 1 year ago by Markus Holzer
--- a/pystencils/cpu/vectorization.py
+++ b/pystencils/cpu/vectorization.py
@@ -140,8 +140,7 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
                                                strided, keep_loop_stop, assume_sufficient_line_padding,
                                                default_float_type):
    """Goes over all innermost loops, changes increment to vector width and replaces field accesses by vector type."""
-    loop_generator = filtered_tree_iteration(ast_node, ast.LoopOverCoordinate, stop_type=ast.SympyAssignment)
+    all_loops = list(filtered_tree_iteration(ast_node, ast.LoopOverCoordinate, stop_type=ast.SympyAssignment))
-    all_loops = [loop for loop in loop_generator]
    inner_loops = [loop for loop in all_loops if loop.is_innermost_loop]
    zero_loop_counters = {loop.loop_counter_symbol: 0 for loop in all_loops}
@@ -179,12 +178,14 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
            base, index = indexed.args
            if loop_counter_symbol in index.atoms(sp.Symbol):
                if not isinstance(vector_width, int):
-                    error_message = "NotImplemented: For instruction sets with variable vector size the aligned and " \
+                    # When the vector width is not known during code generation, we cannot determine whether
-                                    "unaligned load and strore instructions must match."
+                    # the access is aligned or not. None of the current sizeless vector ISAs (SVE and RISC-V-V)
-                    assert ast_node.instruction_set['loadA'] == ast_node.instruction_set['loadU'], error_message
+                    # have separate load/store instructions for aligned and unaligned, so there is no disadvantage
-                    assert ast_node.instruction_set['storeU'] == ast_node.instruction_set['storeA'], error_message
+                    # to falling back to unaligned here. When new ISAs become available, this may need to be revisited.
+                    aligned_access = False
+                else:
+                    aligned_access = (index - loop_counter_symbol).subs(zero_loop_counters) % vector_width == 0
                loop_counter_is_offset = loop_counter_symbol not in (index - loop_counter_symbol).atoms()
-                aligned_access = (index - loop_counter_symbol).subs(zero_loop_counters) % vector_width == 0
                stride = sp.simplify(index.subs({loop_counter_symbol: loop_counter_symbol + 1}) - index)
                if not loop_counter_is_offset and (not strided or loop_counter_symbol in stride.atoms()):
                    successful = False