Skip to content
Snippets Groups Projects
Commit 4141f192 authored by Markus Holzer's avatar Markus Holzer
Browse files

Remove benchmark generation bug fix from this MR

parent 7c437d92
No related branches found
No related tags found
1 merge request!210WIP: Assembly
...@@ -10,11 +10,8 @@ from pystencils.backends.cbackend import generate_c, get_headers ...@@ -10,11 +10,8 @@ from pystencils.backends.cbackend import generate_c, get_headers
from pystencils.cpu.cpujit import get_compiler_config, run_compile_step from pystencils.cpu.cpujit import get_compiler_config, run_compile_step
from pystencils.data_types import get_base_type from pystencils.data_types import get_base_type
from pystencils.include import get_pystencils_include_path from pystencils.include import get_pystencils_include_path
from pystencils.integer_functions import modulo_ceil
from pystencils.sympyextensions import prod from pystencils.sympyextensions import prod
import numpy as np
def generate_benchmark(ast, likwid=False, openmp=False, timing=False): def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
"""Return C code of a benchmark program for the given kernel. """Return C code of a benchmark program for the given kernel.
...@@ -40,24 +37,7 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False): ...@@ -40,24 +37,7 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
assert p.is_field_pointer, "Benchmark implemented only for kernels with fixed loop size" assert p.is_field_pointer, "Benchmark implemented only for kernels with fixed loop size"
field = accessed_fields[p.field_name] field = accessed_fields[p.field_name]
dtype = str(get_base_type(p.symbol.dtype)) dtype = str(get_base_type(p.symbol.dtype))
np_dtype = np.dtype(dtype) fields.append((p.field_name, dtype, prod(field.shape)))
dim0_size = field.shape[-1]
dim1_size = np.prod(field.shape[:-1])
size_data_type = np_dtype.itemsize
elements = prod(field.shape)
align = ast.instruction_set['width'] * size_data_type
padding_elements = modulo_ceil(dim0_size, ast.instruction_set['width']) - dim0_size
padding_bytes = padding_elements * size_data_type
ghost_layers = max(max(ast.ghost_layers))
size = dim1_size * padding_bytes + np.prod(field.shape) * size_data_type
assert align % np_dtype.itemsize == 0
offset = ((dim0_size + padding_elements + ghost_layers) % ast.instruction_set['width']) * size_data_type
fields.append((p.field_name, dtype, elements, size, offset, align))
call_parameters.append(p.field_name) call_parameters.append(p.field_name)
header_list = get_headers(ast) header_list = get_headers(ast)
...@@ -119,10 +99,10 @@ def run_c_benchmark(ast, inner_iterations, outer_iterations=3, path=None): ...@@ -119,10 +99,10 @@ def run_c_benchmark(ast, inner_iterations, outer_iterations=3, path=None):
compiler_config = get_compiler_config() compiler_config = get_compiler_config()
compile_cmd = [compiler_config['command']] + compiler_config['flags'].split() compile_cmd = [compiler_config['command']] + compiler_config['flags'].split()
compile_cmd += [*extra_flags, compile_cmd += [*extra_flags,
str(kerncraft_path / 'headers' / 'timing.c'), kerncraft_path / 'headers' / 'timing.c',
str(kerncraft_path / 'headers' / 'dummy.c'), kerncraft_path / 'headers' / 'dummy.c',
str(path / 'bench.c'), path / 'bench.c',
'-o', str(path / 'bench'), '-o', path / 'bench',
] ]
run_compile_step(compile_cmd) run_compile_step(compile_cmd)
......
...@@ -5,7 +5,6 @@ ...@@ -5,7 +5,6 @@
#include <stdbool.h> #include <stdbool.h>
#include <math.h> #include <math.h>
#include <stdio.h> #include <stdio.h>
#include <assert.h>
{{ includes }} {{ includes }}
...@@ -19,43 +18,6 @@ void dummy(void *); ...@@ -19,43 +18,6 @@ void dummy(void *);
void timing(double* wcTime, double* cpuTime); void timing(double* wcTime, double* cpuTime);
extern int var_false; extern int var_false;
/* see waLBerla src/field/allocation/AlignedMalloc */
void *aligned_malloc_with_offset( uint64_t size, uint64_t alignment, uint64_t offset )
{
// With 0 alignment this function makes no sense
// use normal malloc instead
assert( alignment > 0 );
// Tests if alignment is power of two (assuming alignment>0)
assert( !(alignment & (alignment - 1)) );
assert( offset < alignment );
void *pa; // pointer to allocated memory
void *ptr; // pointer to usable aligned memory
pa=std::malloc( (size+2*alignment-1 )+sizeof(void *));
if(!pa)
return nullptr;
// Find next aligned position, starting at pa+sizeof(void*)-1
ptr=(void*)( ((size_t)pa+sizeof(void *)+alignment-1) & ~(alignment-1));
ptr=(void*) ( (char*)(ptr) + alignment - offset);
// Store pointer to real allocated chunk just before usable chunk
*((void **)ptr-1)=pa;
assert( ((size_t)ptr+offset) % alignment == 0 );
return ptr;
}
void aligned_free( void *ptr )
{
// assume that pointer to real allocated chunk is stored just before
// chunk that was given to user
if(ptr)
std::free(*((void **)ptr-1));
}
{{kernel_code}} {{kernel_code}}
...@@ -66,11 +28,11 @@ int main(int argc, char **argv) ...@@ -66,11 +28,11 @@ int main(int argc, char **argv)
likwid_markerInit(); likwid_markerInit();
{%- endif %} {%- endif %}
{%- for field_name, dataType, elements, size, offset, alignment in fields %} {%- for field_name, dataType, size in fields %}
// Initialization {{field_name}} // Initialization {{field_name}}
double * {{field_name}} = (double *) aligned_malloc_with_offset({{size}}, {{alignment}}, {{offset}}); double * {{field_name}} = (double *) aligned_malloc(sizeof({{dataType}}) * {{size}}, 64);
for (unsigned long long i = 0; i < {{elements}}; ++i) for (unsigned long long i = 0; i < {{size}}; ++i)
{{field_name}}[i] = 0.23; {{field_name}}[i] = 0.23;
if(var_false) if(var_false)
...@@ -107,18 +69,18 @@ int main(int argc, char **argv) ...@@ -107,18 +69,18 @@ int main(int argc, char **argv)
likwid_markerStartRegion("loop"); likwid_markerStartRegion("loop");
{%- endif %} {%- endif %}
} }
{%- if timing %} {%- if timing %}
double wcStartTime, cpuStartTime, wcEndTime, cpuEndTime; double wcStartTime, cpuStartTime, wcEndTime, cpuEndTime;
timing(&wcStartTime, &cpuStartTime); timing(&wcStartTime, &cpuStartTime);
{%- endif %} {%- endif %}
for (; repeat > 0; --repeat) for (; repeat > 0; --repeat)
{ {
{{kernelName}}({{call_argument_list}}); {{kernelName}}({{call_argument_list}});
// Dummy calls // Dummy calls
{%- for field_name, dataType, elements, size, offset, alignment in fields %} {%- for field_name, dataType, size in fields %}
if(var_false) dummy((void*){{field_name}}); if(var_false) dummy((void*){{field_name}});
{%- endfor %} {%- endfor %}
{%- for constantName, dataType in constants %} {%- for constantName, dataType in constants %}
...@@ -143,8 +105,4 @@ int main(int argc, char **argv) ...@@ -143,8 +105,4 @@ int main(int argc, char **argv)
{%- if likwid %} {%- if likwid %}
likwid_markerClose(); likwid_markerClose();
{%- endif %} {%- endif %}
{%- for field_name, dataType, elements, size, offset, alignment in fields %}
aligned_free({{field_name}});
{%- endfor %}
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment