diff --git a/pystencils/cpu/cpujit.py b/pystencils/cpu/cpujit.py index 5001c5e02585fce417bc3431eb0e0f2275a975fd..d2966f8d4592d260f4a29df70fded769d8c06e6e 100644 --- a/pystencils/cpu/cpujit.py +++ b/pystencils/cpu/cpujit.py @@ -356,7 +356,7 @@ def equal_size_check(fields): return template_size_check.format(cond=cond) -def create_function_boilerplate_code(parameter_info, name, insert_checks=True): +def create_function_boilerplate_code(parameter_info, name, ast_node, insert_checks=True): pre_call_code = "" parameters = [] post_call_code = "" @@ -374,6 +374,19 @@ def create_function_boilerplate_code(parameter_info, name, insert_checks=True): np_dtype = field.dtype.numpy_dtype item_size = np_dtype.itemsize + if ast_node.instruction_set: + byte_width = ast_node.instruction_set['width'] * item_size + offset = max(max(ast_node.ghost_layers)) * item_size + offset_cond = f"(((uintptr_t) buffer_{field.name}.buf) + {offset}) % {byte_width} == 0" + + message = str(offset) + ". This is probably due to a different number of ghost_layers chosen for " \ + "the arrays and the kernel creation. If the number of ghost layers for the " \ + "kernel creation is not specified it will choose a suitable value " \ + "automatically. This value might not " \ + "be compatible with the allocated arrays." + pre_call_code += template_check_array.format(cond=offset_cond, what="offset", name=field.name, + expected=message) + if (np_dtype.isbuiltin and FieldType.is_generic(field) and not np.issubdtype(field.dtype.numpy_dtype, np.complexfloating)): dtype_cond = "buffer_{name}.format[0] == '{format}'".format(name=field.name, @@ -504,7 +517,7 @@ class ExtensionModuleCode: old_name = ast.function_name ast.function_name = "kernel_" + name print(generate_c(ast, custom_backend=self._custom_backend), file=file) - print(create_function_boilerplate_code(ast.get_parameters(), name), file=file) + print(create_function_boilerplate_code(ast.get_parameters(), name, ast), file=file) ast.function_name = old_name print(create_module_boilerplate_code(self.module_name, self._function_names), file=file)