From 1800727502221dd895dec6b9ee09eae15ca0d781 Mon Sep 17 00:00:00 2001
From: Markus Holzer <markus.holzer@fau.de>
Date: Mon, 18 Sep 2023 09:01:50 +0200
Subject: [PATCH] Fix if loada and loadu are similar

---
 pystencils/cpu/vectorization.py                 |  5 ++++-
 pystencils_tests/test_vectorization_specific.py | 11 +++++++----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/pystencils/cpu/vectorization.py b/pystencils/cpu/vectorization.py
index 265bdeaea..7c377944e 100644
--- a/pystencils/cpu/vectorization.py
+++ b/pystencils/cpu/vectorization.py
@@ -149,6 +149,9 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
     vector_width = ast_node.instruction_set['width']
     vector_int_width = ast_node.instruction_set['intwidth']
 
+    load_a = ast_node.instruction_set['loadA']
+    load_u = ast_node.instruction_set['loadU']
+
     for loop_node in inner_loops:
         loop_range = loop_node.stop - loop_node.start
 
@@ -177,7 +180,7 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
         for indexed in loop_node.atoms(sp.Indexed):
             base, index = indexed.args
             if loop_counter_symbol in index.atoms(sp.Symbol):
-                if not isinstance(vector_width, int):
+                if not isinstance(vector_width, int) or load_a == load_u:
                     # When the vector width is not known during code generation, we cannot determine whether
                     # the access is aligned or not. None of the current sizeless vector ISAs (SVE and RISC-V-V)
                     # have separate load/store instructions for aligned and unaligned, so there is no disadvantage
diff --git a/pystencils_tests/test_vectorization_specific.py b/pystencils_tests/test_vectorization_specific.py
index 46e13c2d7..db4965755 100644
--- a/pystencils_tests/test_vectorization_specific.py
+++ b/pystencils_tests/test_vectorization_specific.py
@@ -119,11 +119,14 @@ def test_alignment_and_correct_ghost_layers(gl_field, gl_kernel, instruction_set
                                                   cpu_vectorize_info=opt, ghost_layers=gl_kernel)
     ast = ps.create_kernel(update_rule, config=config)
     kernel = ast.compile()
-    if gl_kernel != gl_field:
-        with pytest.raises(ValueError):
-            dh.run_kernel(kernel)
-    else:
+    if ast.instruction_set['loadA'] == ast.instruction_set['loadU']:
         dh.run_kernel(kernel)
+    else:
+        if gl_kernel != gl_field:
+            with pytest.raises(ValueError):
+                dh.run_kernel(kernel)
+        else:
+            dh.run_kernel(kernel)
 
 
 @pytest.mark.parametrize('instruction_set', supported_instruction_sets)
-- 
GitLab