diff --git a/pystencils/kerncraft_coupling/generate_benchmark.py b/pystencils/kerncraft_coupling/generate_benchmark.py
index 3dd36b22546d252770780952067928624f5a5da1..0990e7f6e7cc845f3c23569128882ec6304ba662 100644
--- a/pystencils/kerncraft_coupling/generate_benchmark.py
+++ b/pystencils/kerncraft_coupling/generate_benchmark.py
@@ -42,16 +42,22 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
             dtype = str(get_base_type(p.symbol.dtype))
             np_dtype = np.dtype(dtype)
 
+            dim0_size = field.shape[-1]
+            dim1_size = np.prod(field.shape[:-1])
+
             size_data_type = np_dtype.itemsize
             elements = prod(field.shape)
-            align = 64
-            required_size = size_data_type * elements + align
-            size = modulo_ceil(required_size, align)
+            align = ast.instruction_set['width'] * size_data_type
+            padding_elements = dim0_size % ast.instruction_set['width']
+            padding_bytes = padding_elements * size_data_type
+            ghost_layers = max(max(ast.ghost_layers))
+
+            size = dim1_size * padding_bytes + np.prod(field.shape) * size_data_type
 
             assert align % np_dtype.itemsize == 0
-            offset = int(-ast.ghost_layers[0][0] % (align / np_dtype.itemsize))
+            offset = ((dim0_size + padding_elements + ghost_layers) % ast.instruction_set['width']) * size_data_type
 
-            fields.append((p.field_name, dtype, elements, size, offset))
+            fields.append((p.field_name, dtype, elements, size, offset, align))
             call_parameters.append(p.field_name)
 
     header_list = get_headers(ast)
diff --git a/pystencils/kerncraft_coupling/templates/benchmark.c b/pystencils/kerncraft_coupling/templates/benchmark.c
index 0539b501ccd554329db48550473f24d1bb555353..ea3e9c1d47f1bb42cd8db63e0e90e91a7f0c1c5e 100644
--- a/pystencils/kerncraft_coupling/templates/benchmark.c
+++ b/pystencils/kerncraft_coupling/templates/benchmark.c
@@ -5,6 +5,7 @@
 #include <stdbool.h>
 #include <math.h>
 #include <stdio.h>
+#include <assert.h>
 
 {{ includes }}
 
@@ -18,6 +19,43 @@ void dummy(void *);
 void timing(double* wcTime, double* cpuTime);
 extern int var_false;
 
+/* see waLBerla src/field/allocation/AlignedMalloc */
+void *aligned_malloc_with_offset( uint64_t size, uint64_t alignment, uint64_t offset )
+{
+    // With 0 alignment this function makes no sense
+    // use normal malloc instead
+    assert( alignment > 0 );
+    // Tests if alignment is power of two (assuming alignment>0)
+    assert( !(alignment & (alignment - 1)) );
+    assert( offset < alignment );
+
+    void *pa;  // pointer to allocated memory
+    void *ptr; // pointer to usable aligned memory
+
+    pa=std::malloc( (size+2*alignment-1 )+sizeof(void *));
+    if(!pa)
+        return nullptr;
+
+    // Find next aligned position, starting at pa+sizeof(void*)-1
+    ptr=(void*)( ((size_t)pa+sizeof(void *)+alignment-1) & ~(alignment-1));
+    ptr=(void*) ( (char*)(ptr) + alignment - offset);
+
+    // Store pointer to real allocated chunk just before usable chunk
+    *((void **)ptr-1)=pa;
+
+    assert( ((size_t)ptr+offset) % alignment == 0 );
+
+    return ptr;
+}
+
+void aligned_free( void *ptr )
+{
+    // assume that pointer to real allocated chunk is stored just before
+    // chunk that was given to user
+    if(ptr)
+        std::free(*((void **)ptr-1));
+}
+
 
 {{kernel_code}}
 
@@ -28,10 +66,10 @@ int main(int argc, char **argv)
   likwid_markerInit();
   {%- endif %}
 
-  {%- for field_name, dataType, elements, size, offset in fields %}
+  {%- for field_name, dataType, elements, size, offset, alignment in fields %}
 
   // Initialization {{field_name}}
-  double * {{field_name}} = (double *) aligned_alloc(64, {{size}}) + {{offset}};
+  double * {{field_name}} = (double *) aligned_malloc_with_offset({{size}}, {{alignment}}, {{offset}});
   for (unsigned long long i = 0; i < {{elements}}; ++i)
     {{field_name}}[i] = 0.23;
 
@@ -80,7 +118,7 @@ int main(int argc, char **argv)
       {{kernelName}}({{call_argument_list}});
 
       // Dummy calls
-      {%- for field_name, dataType, elements, size, offset in fields %}
+      {%- for field_name, dataType, elements, size, offset, alignment in fields %}
       if(var_false) dummy((void*){{field_name}});
       {%- endfor %}
       {%- for constantName, dataType in constants %}
@@ -106,7 +144,7 @@ int main(int argc, char **argv)
   likwid_markerClose();
   {%- endif %}
 
-  {%- for field_name, dataType, elements, size, offset in fields %}
-  free({{field_name}} - {{offset}});
+  {%- for field_name, dataType, elements, size, offset, alignment in fields %}
+  aligned_free({{field_name}});
   {%- endfor %}
 }