Skip to content
Snippets Groups Projects
Commit 88c1b63a authored by Markus Holzer's avatar Markus Holzer
Browse files

New try for aligment

parent e1f5683e
No related branches found
No related tags found
1 merge request!210WIP: Assembly
......@@ -42,16 +42,22 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
dtype = str(get_base_type(p.symbol.dtype))
np_dtype = np.dtype(dtype)
dim0_size = field.shape[-1]
dim1_size = np.prod(field.shape[:-1])
size_data_type = np_dtype.itemsize
elements = prod(field.shape)
align = 64
required_size = size_data_type * elements + align
size = modulo_ceil(required_size, align)
align = ast.instruction_set['width'] * size_data_type
padding_elements = dim0_size % ast.instruction_set['width']
padding_bytes = padding_elements * size_data_type
ghost_layers = max(max(ast.ghost_layers))
size = dim1_size * padding_bytes + np.prod(field.shape) * size_data_type
assert align % np_dtype.itemsize == 0
offset = int(-ast.ghost_layers[0][0] % (align / np_dtype.itemsize))
offset = ((dim0_size + padding_elements + ghost_layers) % ast.instruction_set['width']) * size_data_type
fields.append((p.field_name, dtype, elements, size, offset))
fields.append((p.field_name, dtype, elements, size, offset, align))
call_parameters.append(p.field_name)
header_list = get_headers(ast)
......
......@@ -5,6 +5,7 @@
#include <stdbool.h>
#include <math.h>
#include <stdio.h>
#include <assert.h>
{{ includes }}
......@@ -18,6 +19,43 @@ void dummy(void *);
void timing(double* wcTime, double* cpuTime);
extern int var_false;
/* see waLBerla src/field/allocation/AlignedMalloc */
void *aligned_malloc_with_offset( uint64_t size, uint64_t alignment, uint64_t offset )
{
// With 0 alignment this function makes no sense
// use normal malloc instead
assert( alignment > 0 );
// Tests if alignment is power of two (assuming alignment>0)
assert( !(alignment & (alignment - 1)) );
assert( offset < alignment );
void *pa; // pointer to allocated memory
void *ptr; // pointer to usable aligned memory
pa=std::malloc( (size+2*alignment-1 )+sizeof(void *));
if(!pa)
return nullptr;
// Find next aligned position, starting at pa+sizeof(void*)-1
ptr=(void*)( ((size_t)pa+sizeof(void *)+alignment-1) & ~(alignment-1));
ptr=(void*) ( (char*)(ptr) + alignment - offset);
// Store pointer to real allocated chunk just before usable chunk
*((void **)ptr-1)=pa;
assert( ((size_t)ptr+offset) % alignment == 0 );
return ptr;
}
void aligned_free( void *ptr )
{
// assume that pointer to real allocated chunk is stored just before
// chunk that was given to user
if(ptr)
std::free(*((void **)ptr-1));
}
{{kernel_code}}
......@@ -28,10 +66,10 @@ int main(int argc, char **argv)
likwid_markerInit();
{%- endif %}
{%- for field_name, dataType, elements, size, offset in fields %}
{%- for field_name, dataType, elements, size, offset, alignment in fields %}
// Initialization {{field_name}}
double * {{field_name}} = (double *) aligned_alloc(64, {{size}}) + {{offset}};
double * {{field_name}} = (double *) aligned_malloc_with_offset({{size}}, {{alignment}}, {{offset}});
for (unsigned long long i = 0; i < {{elements}}; ++i)
{{field_name}}[i] = 0.23;
......@@ -80,7 +118,7 @@ int main(int argc, char **argv)
{{kernelName}}({{call_argument_list}});
// Dummy calls
{%- for field_name, dataType, elements, size, offset in fields %}
{%- for field_name, dataType, elements, size, offset, alignment in fields %}
if(var_false) dummy((void*){{field_name}});
{%- endfor %}
{%- for constantName, dataType in constants %}
......@@ -106,7 +144,7 @@ int main(int argc, char **argv)
likwid_markerClose();
{%- endif %}
{%- for field_name, dataType, elements, size, offset in fields %}
free({{field_name}} - {{offset}});
{%- for field_name, dataType, elements, size, offset, alignment in fields %}
aligned_free({{field_name}});
{%- endfor %}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment