From 1800727502221dd895dec6b9ee09eae15ca0d781 Mon Sep 17 00:00:00 2001 From: Markus Holzer <markus.holzer@fau.de> Date: Mon, 18 Sep 2023 09:01:50 +0200 Subject: [PATCH] Fix if loada and loadu are similar --- pystencils/cpu/vectorization.py | 5 ++++- pystencils_tests/test_vectorization_specific.py | 11 +++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/pystencils/cpu/vectorization.py b/pystencils/cpu/vectorization.py index 265bdeaea..7c377944e 100644 --- a/pystencils/cpu/vectorization.py +++ b/pystencils/cpu/vectorization.py @@ -149,6 +149,9 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem vector_width = ast_node.instruction_set['width'] vector_int_width = ast_node.instruction_set['intwidth'] + load_a = ast_node.instruction_set['loadA'] + load_u = ast_node.instruction_set['loadU'] + for loop_node in inner_loops: loop_range = loop_node.stop - loop_node.start @@ -177,7 +180,7 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem for indexed in loop_node.atoms(sp.Indexed): base, index = indexed.args if loop_counter_symbol in index.atoms(sp.Symbol): - if not isinstance(vector_width, int): + if not isinstance(vector_width, int) or load_a == load_u: # When the vector width is not known during code generation, we cannot determine whether # the access is aligned or not. None of the current sizeless vector ISAs (SVE and RISC-V-V) # have separate load/store instructions for aligned and unaligned, so there is no disadvantage diff --git a/pystencils_tests/test_vectorization_specific.py b/pystencils_tests/test_vectorization_specific.py index 46e13c2d7..db4965755 100644 --- a/pystencils_tests/test_vectorization_specific.py +++ b/pystencils_tests/test_vectorization_specific.py @@ -119,11 +119,14 @@ def test_alignment_and_correct_ghost_layers(gl_field, gl_kernel, instruction_set cpu_vectorize_info=opt, ghost_layers=gl_kernel) ast = ps.create_kernel(update_rule, config=config) kernel = ast.compile() - if gl_kernel != gl_field: - with pytest.raises(ValueError): - dh.run_kernel(kernel) - else: + if ast.instruction_set['loadA'] == ast.instruction_set['loadU']: dh.run_kernel(kernel) + else: + if gl_kernel != gl_field: + with pytest.raises(ValueError): + dh.run_kernel(kernel) + else: + dh.run_kernel(kernel) @pytest.mark.parametrize('instruction_set', supported_instruction_sets) -- GitLab