diff --git a/pystencils/cpu/vectorization.py b/pystencils/cpu/vectorization.py index 265bdeaea298d653e9785c0d642bf974df7c8164..7c377944ea3db2e1200cb515fe27d63b8cdd953c 100644 --- a/pystencils/cpu/vectorization.py +++ b/pystencils/cpu/vectorization.py @@ -149,6 +149,9 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem vector_width = ast_node.instruction_set['width'] vector_int_width = ast_node.instruction_set['intwidth'] + load_a = ast_node.instruction_set['loadA'] + load_u = ast_node.instruction_set['loadU'] + for loop_node in inner_loops: loop_range = loop_node.stop - loop_node.start @@ -177,7 +180,7 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem for indexed in loop_node.atoms(sp.Indexed): base, index = indexed.args if loop_counter_symbol in index.atoms(sp.Symbol): - if not isinstance(vector_width, int): + if not isinstance(vector_width, int) or load_a == load_u: # When the vector width is not known during code generation, we cannot determine whether # the access is aligned or not. None of the current sizeless vector ISAs (SVE and RISC-V-V) # have separate load/store instructions for aligned and unaligned, so there is no disadvantage diff --git a/pystencils_tests/test_vectorization_specific.py b/pystencils_tests/test_vectorization_specific.py index 46e13c2d7f59bfaa9fa50f5e3d8632da3c1a25ac..db49657557b9eea5fd6e05f51f69f2032ffd4e3a 100644 --- a/pystencils_tests/test_vectorization_specific.py +++ b/pystencils_tests/test_vectorization_specific.py @@ -119,11 +119,14 @@ def test_alignment_and_correct_ghost_layers(gl_field, gl_kernel, instruction_set cpu_vectorize_info=opt, ghost_layers=gl_kernel) ast = ps.create_kernel(update_rule, config=config) kernel = ast.compile() - if gl_kernel != gl_field: - with pytest.raises(ValueError): - dh.run_kernel(kernel) - else: + if ast.instruction_set['loadA'] == ast.instruction_set['loadU']: dh.run_kernel(kernel) + else: + if gl_kernel != gl_field: + with pytest.raises(ValueError): + dh.run_kernel(kernel) + else: + dh.run_kernel(kernel) @pytest.mark.parametrize('instruction_set', supported_instruction_sets)