Skip to content
Snippets Groups Projects
Commit 88c1b63a authored by Markus Holzer's avatar Markus Holzer
Browse files

New try for aligment

parent e1f5683e
No related branches found
No related tags found
No related merge requests found
Pipeline #30567 failed
...@@ -42,16 +42,22 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False): ...@@ -42,16 +42,22 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
dtype = str(get_base_type(p.symbol.dtype)) dtype = str(get_base_type(p.symbol.dtype))
np_dtype = np.dtype(dtype) np_dtype = np.dtype(dtype)
dim0_size = field.shape[-1]
dim1_size = np.prod(field.shape[:-1])
size_data_type = np_dtype.itemsize size_data_type = np_dtype.itemsize
elements = prod(field.shape) elements = prod(field.shape)
align = 64 align = ast.instruction_set['width'] * size_data_type
required_size = size_data_type * elements + align padding_elements = dim0_size % ast.instruction_set['width']
size = modulo_ceil(required_size, align) padding_bytes = padding_elements * size_data_type
ghost_layers = max(max(ast.ghost_layers))
size = dim1_size * padding_bytes + np.prod(field.shape) * size_data_type
assert align % np_dtype.itemsize == 0 assert align % np_dtype.itemsize == 0
offset = int(-ast.ghost_layers[0][0] % (align / np_dtype.itemsize)) offset = ((dim0_size + padding_elements + ghost_layers) % ast.instruction_set['width']) * size_data_type
fields.append((p.field_name, dtype, elements, size, offset)) fields.append((p.field_name, dtype, elements, size, offset, align))
call_parameters.append(p.field_name) call_parameters.append(p.field_name)
header_list = get_headers(ast) header_list = get_headers(ast)
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include <stdbool.h> #include <stdbool.h>
#include <math.h> #include <math.h>
#include <stdio.h> #include <stdio.h>
#include <assert.h>
{{ includes }} {{ includes }}
...@@ -18,6 +19,43 @@ void dummy(void *); ...@@ -18,6 +19,43 @@ void dummy(void *);
void timing(double* wcTime, double* cpuTime); void timing(double* wcTime, double* cpuTime);
extern int var_false; extern int var_false;
/* see waLBerla src/field/allocation/AlignedMalloc */
void *aligned_malloc_with_offset( uint64_t size, uint64_t alignment, uint64_t offset )
{
// With 0 alignment this function makes no sense
// use normal malloc instead
assert( alignment > 0 );
// Tests if alignment is power of two (assuming alignment>0)
assert( !(alignment & (alignment - 1)) );
assert( offset < alignment );
void *pa; // pointer to allocated memory
void *ptr; // pointer to usable aligned memory
pa=std::malloc( (size+2*alignment-1 )+sizeof(void *));
if(!pa)
return nullptr;
// Find next aligned position, starting at pa+sizeof(void*)-1
ptr=(void*)( ((size_t)pa+sizeof(void *)+alignment-1) & ~(alignment-1));
ptr=(void*) ( (char*)(ptr) + alignment - offset);
// Store pointer to real allocated chunk just before usable chunk
*((void **)ptr-1)=pa;
assert( ((size_t)ptr+offset) % alignment == 0 );
return ptr;
}
void aligned_free( void *ptr )
{
// assume that pointer to real allocated chunk is stored just before
// chunk that was given to user
if(ptr)
std::free(*((void **)ptr-1));
}
{{kernel_code}} {{kernel_code}}
...@@ -28,10 +66,10 @@ int main(int argc, char **argv) ...@@ -28,10 +66,10 @@ int main(int argc, char **argv)
likwid_markerInit(); likwid_markerInit();
{%- endif %} {%- endif %}
{%- for field_name, dataType, elements, size, offset in fields %} {%- for field_name, dataType, elements, size, offset, alignment in fields %}
// Initialization {{field_name}} // Initialization {{field_name}}
double * {{field_name}} = (double *) aligned_alloc(64, {{size}}) + {{offset}}; double * {{field_name}} = (double *) aligned_malloc_with_offset({{size}}, {{alignment}}, {{offset}});
for (unsigned long long i = 0; i < {{elements}}; ++i) for (unsigned long long i = 0; i < {{elements}}; ++i)
{{field_name}}[i] = 0.23; {{field_name}}[i] = 0.23;
...@@ -80,7 +118,7 @@ int main(int argc, char **argv) ...@@ -80,7 +118,7 @@ int main(int argc, char **argv)
{{kernelName}}({{call_argument_list}}); {{kernelName}}({{call_argument_list}});
// Dummy calls // Dummy calls
{%- for field_name, dataType, elements, size, offset in fields %} {%- for field_name, dataType, elements, size, offset, alignment in fields %}
if(var_false) dummy((void*){{field_name}}); if(var_false) dummy((void*){{field_name}});
{%- endfor %} {%- endfor %}
{%- for constantName, dataType in constants %} {%- for constantName, dataType in constants %}
...@@ -106,7 +144,7 @@ int main(int argc, char **argv) ...@@ -106,7 +144,7 @@ int main(int argc, char **argv)
likwid_markerClose(); likwid_markerClose();
{%- endif %} {%- endif %}
{%- for field_name, dataType, elements, size, offset in fields %} {%- for field_name, dataType, elements, size, offset, alignment in fields %}
free({{field_name}} - {{offset}}); aligned_free({{field_name}});
{%- endfor %} {%- endfor %}
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment