diff --git a/pystencils/kerncraft_coupling/generate_benchmark.py b/pystencils/kerncraft_coupling/generate_benchmark.py index 3dd36b22546d252770780952067928624f5a5da1..0990e7f6e7cc845f3c23569128882ec6304ba662 100644 --- a/pystencils/kerncraft_coupling/generate_benchmark.py +++ b/pystencils/kerncraft_coupling/generate_benchmark.py @@ -42,16 +42,22 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False): dtype = str(get_base_type(p.symbol.dtype)) np_dtype = np.dtype(dtype) + dim0_size = field.shape[-1] + dim1_size = np.prod(field.shape[:-1]) + size_data_type = np_dtype.itemsize elements = prod(field.shape) - align = 64 - required_size = size_data_type * elements + align - size = modulo_ceil(required_size, align) + align = ast.instruction_set['width'] * size_data_type + padding_elements = dim0_size % ast.instruction_set['width'] + padding_bytes = padding_elements * size_data_type + ghost_layers = max(max(ast.ghost_layers)) + + size = dim1_size * padding_bytes + np.prod(field.shape) * size_data_type assert align % np_dtype.itemsize == 0 - offset = int(-ast.ghost_layers[0][0] % (align / np_dtype.itemsize)) + offset = ((dim0_size + padding_elements + ghost_layers) % ast.instruction_set['width']) * size_data_type - fields.append((p.field_name, dtype, elements, size, offset)) + fields.append((p.field_name, dtype, elements, size, offset, align)) call_parameters.append(p.field_name) header_list = get_headers(ast) diff --git a/pystencils/kerncraft_coupling/templates/benchmark.c b/pystencils/kerncraft_coupling/templates/benchmark.c index 0539b501ccd554329db48550473f24d1bb555353..ea3e9c1d47f1bb42cd8db63e0e90e91a7f0c1c5e 100644 --- a/pystencils/kerncraft_coupling/templates/benchmark.c +++ b/pystencils/kerncraft_coupling/templates/benchmark.c @@ -5,6 +5,7 @@ #include <stdbool.h> #include <math.h> #include <stdio.h> +#include <assert.h> {{ includes }} @@ -18,6 +19,43 @@ void dummy(void *); void timing(double* wcTime, double* cpuTime); extern int var_false; +/* see waLBerla src/field/allocation/AlignedMalloc */ +void *aligned_malloc_with_offset( uint64_t size, uint64_t alignment, uint64_t offset ) +{ + // With 0 alignment this function makes no sense + // use normal malloc instead + assert( alignment > 0 ); + // Tests if alignment is power of two (assuming alignment>0) + assert( !(alignment & (alignment - 1)) ); + assert( offset < alignment ); + + void *pa; // pointer to allocated memory + void *ptr; // pointer to usable aligned memory + + pa=std::malloc( (size+2*alignment-1 )+sizeof(void *)); + if(!pa) + return nullptr; + + // Find next aligned position, starting at pa+sizeof(void*)-1 + ptr=(void*)( ((size_t)pa+sizeof(void *)+alignment-1) & ~(alignment-1)); + ptr=(void*) ( (char*)(ptr) + alignment - offset); + + // Store pointer to real allocated chunk just before usable chunk + *((void **)ptr-1)=pa; + + assert( ((size_t)ptr+offset) % alignment == 0 ); + + return ptr; +} + +void aligned_free( void *ptr ) +{ + // assume that pointer to real allocated chunk is stored just before + // chunk that was given to user + if(ptr) + std::free(*((void **)ptr-1)); +} + {{kernel_code}} @@ -28,10 +66,10 @@ int main(int argc, char **argv) likwid_markerInit(); {%- endif %} - {%- for field_name, dataType, elements, size, offset in fields %} + {%- for field_name, dataType, elements, size, offset, alignment in fields %} // Initialization {{field_name}} - double * {{field_name}} = (double *) aligned_alloc(64, {{size}}) + {{offset}}; + double * {{field_name}} = (double *) aligned_malloc_with_offset({{size}}, {{alignment}}, {{offset}}); for (unsigned long long i = 0; i < {{elements}}; ++i) {{field_name}}[i] = 0.23; @@ -80,7 +118,7 @@ int main(int argc, char **argv) {{kernelName}}({{call_argument_list}}); // Dummy calls - {%- for field_name, dataType, elements, size, offset in fields %} + {%- for field_name, dataType, elements, size, offset, alignment in fields %} if(var_false) dummy((void*){{field_name}}); {%- endfor %} {%- for constantName, dataType in constants %} @@ -106,7 +144,7 @@ int main(int argc, char **argv) likwid_markerClose(); {%- endif %} - {%- for field_name, dataType, elements, size, offset in fields %} - free({{field_name}} - {{offset}}); + {%- for field_name, dataType, elements, size, offset, alignment in fields %} + aligned_free({{field_name}}); {%- endfor %} }