diff --git a/pystencils_benchmark/benchmark.py b/pystencils_benchmark/benchmark.py index 28b52493a60e44d58e6548938b80d219485dbbfe..41d7410f50c77bb0c838238ff4f5e0c10a85af3c 100644 --- a/pystencils_benchmark/benchmark.py +++ b/pystencils_benchmark/benchmark.py @@ -6,7 +6,7 @@ from jinja2 import Environment, PackageLoader, StrictUndefined import numpy as np from pystencils.backends.cbackend import generate_c, get_headers -from pystencils.astnodes import KernelFunction +from pystencils.astnodes import KernelFunction, PragmaBlock from pystencils.enums import Backend from pystencils.typing import get_base_type from pystencils.sympyextensions import prod @@ -98,7 +98,7 @@ def kernel_main(kernels_ast: List[KernelFunction], *, Returns: C code as string """ - Kernel = namedtuple('Kernel', ['name', 'constants', 'fields', 'call_parameters', 'call_argument_list']) + Kernel = namedtuple('Kernel', ['name', 'constants', 'fields', 'call_parameters', 'call_argument_list', 'openmp']) kernels = [] includes = set() for kernel in kernels_ast: @@ -107,6 +107,8 @@ def kernel_main(kernels_ast: List[KernelFunction], *, constants = [] fields = [] call_parameters = [] + # TODO: Think about it maybe there is a better way to detect openmp + openmp = isinstance(kernel.body.args[0], PragmaBlock) for p in kernel.get_parameters(): if not p.is_field_parameter: constants.append((p.symbol.name, str(p.symbol.dtype))) @@ -139,8 +141,10 @@ def kernel_main(kernels_ast: List[KernelFunction], *, align = 0 fields.append((p.field_name, dtype, elements, size, offset, align)) call_parameters.append(p.field_name) + + # TODO: Think about openmp detection again kernels.append(Kernel(name=name, fields=fields, constants=constants, call_parameters=call_parameters, - call_argument_list=",".join(call_parameters))) + call_argument_list=",".join(call_parameters), openmp=openmp)) includes.add(name) diff --git a/pystencils_benchmark/templates/main.c b/pystencils_benchmark/templates/main.c index 26fa4a623d0da65d87fd4c6c250261c37b69e400..cec0f0eb6c50c360879d72b7fc817d78126adb9f 100644 --- a/pystencils_benchmark/templates/main.c +++ b/pystencils_benchmark/templates/main.c @@ -39,6 +39,9 @@ int main(int argc, char **argv) {% else %} {{dataType}}* {{field_name}} = ({{dataType}} *) malloc({{size}}); {% endif %} + {% if kernel.openmp %} + #pragma omp parallel for schedule(static) + {% endif %} for (unsigned long long i = 0; i < {{elements}}; ++i) {{field_name}}[i] = 0.23; {% endfor %} @@ -50,7 +53,14 @@ int main(int argc, char **argv) {% endfor %} {% if likwid %} + {% if kernel.openmp %} + #pragma omp parallel + { + {% endif %} LIKWID_MARKER_REGISTER("{{kernel.name}}"); + {% if kernel.openmp %} + } + {% endif %} {% endif %} for(int warmup = 1; warmup >= 0; --warmup) { @@ -58,7 +68,14 @@ int main(int argc, char **argv) if(warmup == 0) { repeat = n_repeat; {% if likwid %} + {% if kernel.openmp %} + #pragma omp parallel + { + {% endif %} LIKWID_MARKER_START("{{kernel.name}}"); + {% if kernel.openmp %} + } + {% endif %} {% endif %} } @@ -80,13 +97,20 @@ int main(int argc, char **argv) {% endif %} } - {% for field_name, dataType, elements, size, offset, alignment in kernel.fields %} - free({{field_name}}); - {% endfor %} - {% if likwid %} + {% if kernel.openmp %} + #pragma omp parallel + { + {% endif %} LIKWID_MARKER_STOP("{{kernel.name}}"); + {% if kernel.openmp %} + } + {% endif %} {% endif %} + + {% for field_name, dataType, elements, size, offset, alignment in kernel.fields %} + free({{field_name}}); + {% endfor %} } {% endfor %}