From 39f90c5b7e5bc56484357e9c2431f10233cf7d37 Mon Sep 17 00:00:00 2001 From: Markus Holzer <markus.holzer@fau.de> Date: Fri, 15 Sep 2023 09:18:55 +0200 Subject: [PATCH] Small changes --- pystencils/cpu/vectorization.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pystencils/cpu/vectorization.py b/pystencils/cpu/vectorization.py index 5903b13e8..265bdeaea 100644 --- a/pystencils/cpu/vectorization.py +++ b/pystencils/cpu/vectorization.py @@ -140,8 +140,7 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem strided, keep_loop_stop, assume_sufficient_line_padding, default_float_type): """Goes over all innermost loops, changes increment to vector width and replaces field accesses by vector type.""" - loop_generator = filtered_tree_iteration(ast_node, ast.LoopOverCoordinate, stop_type=ast.SympyAssignment) - all_loops = [loop for loop in loop_generator] + all_loops = list(filtered_tree_iteration(ast_node, ast.LoopOverCoordinate, stop_type=ast.SympyAssignment)) inner_loops = [loop for loop in all_loops if loop.is_innermost_loop] zero_loop_counters = {loop.loop_counter_symbol: 0 for loop in all_loops} @@ -179,12 +178,14 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem base, index = indexed.args if loop_counter_symbol in index.atoms(sp.Symbol): if not isinstance(vector_width, int): - error_message = "NotImplemented: For instruction sets with variable vector size the aligned and " \ - "unaligned load and strore instructions must match." - assert ast_node.instruction_set['loadA'] == ast_node.instruction_set['loadU'], error_message - assert ast_node.instruction_set['storeU'] == ast_node.instruction_set['storeA'], error_message + # When the vector width is not known during code generation, we cannot determine whether + # the access is aligned or not. None of the current sizeless vector ISAs (SVE and RISC-V-V) + # have separate load/store instructions for aligned and unaligned, so there is no disadvantage + # to falling back to unaligned here. When new ISAs become available, this may need to be revisited. + aligned_access = False + else: + aligned_access = (index - loop_counter_symbol).subs(zero_loop_counters) % vector_width == 0 loop_counter_is_offset = loop_counter_symbol not in (index - loop_counter_symbol).atoms() - aligned_access = (index - loop_counter_symbol).subs(zero_loop_counters) % vector_width == 0 stride = sp.simplify(index.subs({loop_counter_symbol: loop_counter_symbol + 1}) - index) if not loop_counter_is_offset and (not strided or loop_counter_symbol in stride.atoms()): successful = False -- GitLab