diff --git a/pystencils/kerncraft_coupling/generate_benchmark.py b/pystencils/kerncraft_coupling/generate_benchmark.py index 9a012d6c2a75c98faae05e6815dd3883c7d4d2e4..1938fc80528fc14531d2c7baf20e182439857039 100644 --- a/pystencils/kerncraft_coupling/generate_benchmark.py +++ b/pystencils/kerncraft_coupling/generate_benchmark.py @@ -10,8 +10,10 @@ from pystencils.backends.cbackend import generate_c, get_headers from pystencils.cpu.cpujit import get_compiler_config, run_compile_step from pystencils.data_types import get_base_type from pystencils.include import get_pystencils_include_path +from pystencils.integer_functions import modulo_ceil from pystencils.sympyextensions import prod +import numpy as np def generate_benchmark(ast, likwid=False, openmp=False, timing=False): """Return C code of a benchmark program for the given kernel. @@ -37,7 +39,18 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False): assert p.is_field_pointer, "Benchmark implemented only for kernels with fixed loop size" field = accessed_fields[p.field_name] dtype = str(get_base_type(p.symbol.dtype)) - fields.append((p.field_name, dtype, prod(field.shape))) + np_dtype = np.dtype(dtype) + + size_data_type = np_dtype.itemsize + elements = prod(field.shape) + align = 64 + required_size = size_data_type * elements + align + size = modulo_ceil(required_size, align) + + assert align % np_dtype.itemsize == 0 + offset = int(-ast.ghost_layers[0][0] % (align / np_dtype.itemsize)) + + fields.append((p.field_name, dtype, elements, size, offset)) call_parameters.append(p.field_name) header_list = get_headers(ast) @@ -99,10 +112,10 @@ def run_c_benchmark(ast, inner_iterations, outer_iterations=3, path=None): compiler_config = get_compiler_config() compile_cmd = [compiler_config['command']] + compiler_config['flags'].split() compile_cmd += [*extra_flags, - kerncraft_path / 'headers' / 'timing.c', - kerncraft_path / 'headers' / 'dummy.c', - path / 'bench.c', - '-o', path / 'bench', + str(kerncraft_path / 'headers' / 'timing.c'), + str(kerncraft_path / 'headers' / 'dummy.c'), + str(path / 'bench.c'), + '-o', str(path / 'bench'), ] run_compile_step(compile_cmd) diff --git a/pystencils/kerncraft_coupling/templates/benchmark.c b/pystencils/kerncraft_coupling/templates/benchmark.c index ae70ddd6775a45c0709e95d57cef061da2a4b6b0..0539b501ccd554329db48550473f24d1bb555353 100644 --- a/pystencils/kerncraft_coupling/templates/benchmark.c +++ b/pystencils/kerncraft_coupling/templates/benchmark.c @@ -28,11 +28,11 @@ int main(int argc, char **argv) likwid_markerInit(); {%- endif %} - {%- for field_name, dataType, size in fields %} + {%- for field_name, dataType, elements, size, offset in fields %} // Initialization {{field_name}} - double * {{field_name}} = (double *) aligned_malloc(sizeof({{dataType}}) * {{size}}, 64); - for (unsigned long long i = 0; i < {{size}}; ++i) + double * {{field_name}} = (double *) aligned_alloc(64, {{size}}) + {{offset}}; + for (unsigned long long i = 0; i < {{elements}}; ++i) {{field_name}}[i] = 0.23; if(var_false) @@ -80,7 +80,7 @@ int main(int argc, char **argv) {{kernelName}}({{call_argument_list}}); // Dummy calls - {%- for field_name, dataType, size in fields %} + {%- for field_name, dataType, elements, size, offset in fields %} if(var_false) dummy((void*){{field_name}}); {%- endfor %} {%- for constantName, dataType in constants %} @@ -105,4 +105,8 @@ int main(int argc, char **argv) {%- if likwid %} likwid_markerClose(); {%- endif %} + + {%- for field_name, dataType, elements, size, offset in fields %} + free({{field_name}} - {{offset}}); + {%- endfor %} }