diff --git a/pystencils/cpu/cpujit.py b/pystencils/cpu/cpujit.py index d2966f8d4592d260f4a29df70fded769d8c06e6e..51538fa204b6f35dbc4c896b9f04aeb1633ebe48 100644 --- a/pystencils/cpu/cpujit.py +++ b/pystencils/cpu/cpujit.py @@ -59,6 +59,7 @@ from appdirs import user_cache_dir, user_config_dir from pystencils import FieldType from pystencils.backends.cbackend import generate_c, get_headers +from pystencils.data_types import cast_func, VectorType from pystencils.include import get_pystencils_include_path from pystencils.kernel_wrapper import KernelWrapper from pystencils.utils import atomic_file_write, file_handle_for_atomic_write, recursive_dict_update @@ -374,14 +375,17 @@ def create_function_boilerplate_code(parameter_info, name, ast_node, insert_chec np_dtype = field.dtype.numpy_dtype item_size = np_dtype.itemsize - if ast_node.instruction_set: + aligned = any([a.lhs.args[2] for a in ast_node.assignments if + isinstance(a.lhs, cast_func) and isinstance(a.lhs.dtype, VectorType)]) + + if ast_node.instruction_set and aligned: byte_width = ast_node.instruction_set['width'] * item_size offset = max(max(ast_node.ghost_layers)) * item_size offset_cond = f"(((uintptr_t) buffer_{field.name}.buf) + {offset}) % {byte_width} == 0" message = str(offset) + ". This is probably due to a different number of ghost_layers chosen for " \ - "the arrays and the kernel creation. If the number of ghost layers for the " \ - "kernel creation is not specified it will choose a suitable value " \ + "the arrays and the kernel creation. If the number of ghost layers for " \ + "the kernel creation is not specified it will choose a suitable value " \ "automatically. This value might not " \ "be compatible with the allocated arrays." pre_call_code += template_check_array.format(cond=offset_cond, what="offset", name=field.name, @@ -580,7 +584,7 @@ def compile_and_load(ast, custom_backend=None): generated_code = generate_c(ast, dialect='c', custom_backend=custom_backend) fields_accessed = str(ast.fields_accessed) - # Also die Information of the field size should be contained in the hash string. Due to padding the generated code + # Also the information of the field size should be contained in the hash string. Due to padding the generated code # can look similar for different field sizes. code_hash_str = "mod_" + hashlib.sha256((generated_code + fields_accessed).encode()).hexdigest() code = ExtensionModuleCode(module_name=code_hash_str, custom_backend=custom_backend)