diff --git a/pystencils_benchmark/benchmark.py b/pystencils_benchmark/benchmark.py index 34bdf6f8704cf4366f9a1e03ef5e4130ae17a302..df6bd9b6d0c754a76ca6a44c38c1ffc2299d0505 100644 --- a/pystencils_benchmark/benchmark.py +++ b/pystencils_benchmark/benchmark.py @@ -6,7 +6,7 @@ from jinja2 import Environment, PackageLoader, StrictUndefined import numpy as np from pystencils.backends.cbackend import generate_c, get_headers -from pystencils.astnodes import KernelFunction +from pystencils.astnodes import KernelFunction, PragmaBlock from pystencils.enums import Backend from pystencils.typing import get_base_type from pystencils.sympyextensions import prod @@ -22,7 +22,10 @@ _env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=Stric def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]], path: Path = None, *, - compiler: Compiler = Compiler.GCC) -> None: + compiler: Compiler = Compiler.GCC, + timing: bool = True, + likwid: bool = False + ) -> None: if path is None: path = Path('.') else: @@ -47,16 +50,17 @@ def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]], f.write(source) with open(src_path / 'main.c', 'w+') as f: - f.write(kernel_main(kernel_asts)) + f.write(kernel_main(kernel_asts, timing=timing, likwid=likwid)) copy_static_files(path) - compiler_toolchain(path, compiler) + compiler_toolchain(path, compiler, likwid) -def compiler_toolchain(path: Path, compiler: Compiler) -> None: +def compiler_toolchain(path: Path, compiler: Compiler, likwid: bool) -> None: name = compiler.name jinja_context = { 'compiler': name, + 'likwid': likwid, } files = ['Makefile', f'{name}.mk'] @@ -85,17 +89,19 @@ def copy_static_files(path: Path) -> None: f.write(template) -def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True): +def kernel_main(kernels_ast: List[KernelFunction], *, + timing: bool = True, likwid: bool = False) -> str: """ Return C code of a benchmark program for the given kernel. Args: kernels_ast: A list of the pystencils AST object as returned by create_kernel for benchmarking timing: add timing output to the code, prints time per iteration to stdout + likwid: add likwid marker to the code Returns: C code as string """ - Kernel = namedtuple('Kernel', ['name', 'constants', 'fields', 'call_parameters', 'call_argument_list']) + Kernel = namedtuple('Kernel', ['name', 'constants', 'fields', 'call_parameters', 'call_argument_list', 'openmp']) kernels = [] includes = set() for kernel in kernels_ast: @@ -104,6 +110,8 @@ def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True): constants = [] fields = [] call_parameters = [] + # TODO: Think about it maybe there is a better way to detect openmp + openmp = isinstance(kernel.body.args[0], PragmaBlock) for p in kernel.get_parameters(): if not p.is_field_parameter: constants.append((p.symbol.name, str(p.symbol.dtype))) @@ -129,15 +137,18 @@ def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True): size = dim1_size * padding_bytes + np.prod(field.shape) * size_data_type assert align % np_dtype.itemsize == 0 - offset = ((dim0_size + padding_elements + ghost_layers) % kernel.instruction_set['width']) * size_data_type + offset = ((dim0_size + padding_elements + ghost_layers) % + kernel.instruction_set['width']) * size_data_type else: size = elements * size_data_type offset = 0 align = 0 fields.append((p.field_name, dtype, elements, size, offset, align)) call_parameters.append(p.field_name) + + # TODO: Think about openmp detection again kernels.append(Kernel(name=name, fields=fields, constants=constants, call_parameters=call_parameters, - call_argument_list=",".join(call_parameters))) + call_argument_list=",".join(call_parameters), openmp=openmp)) includes.add(name) @@ -145,6 +156,7 @@ def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True): 'kernels': kernels, 'includes': includes, 'timing': timing, + 'likwid': likwid, } main = _env.get_template('main.c').render(**jinja_context) diff --git a/pystencils_benchmark/templates/Clang.mk b/pystencils_benchmark/templates/Clang.mk index 96296cc6d0aff6545ee8457aa601c86265b868a4..15bfe63d81ddcb58dd44b054dc8e477c435fb12c 100644 --- a/pystencils_benchmark/templates/Clang.mk +++ b/pystencils_benchmark/templates/Clang.mk @@ -12,7 +12,7 @@ CFLAGS = -O3 -Wno-format -Wall -Werror $(ANSI_CFLAGS) -fopenmp -march=native # Maybe too much warnings #CFLAGS += -Wcast-qual -Wswitch-default -Wconversion -Wunreachable-code # Specific C flags -CFLAGS := $(CFLAGS) -Wstrict-prototypes +CFLAGS := $(CFLAGS) -Wstrict-prototypes -Wno-error=strict-prototypes LFLAGS = -fopenmp=libomp DEFINES = -D_GNU_SOURCE -DNDEBUG INCLUDES = diff --git a/pystencils_benchmark/templates/GCC.mk b/pystencils_benchmark/templates/GCC.mk index a65ec464c663bfe23a90e5756203ede9eeb4d8de..7dccdfc1f6b35e8630e21966b3a35aaf35fa8841 100644 --- a/pystencils_benchmark/templates/GCC.mk +++ b/pystencils_benchmark/templates/GCC.mk @@ -12,7 +12,7 @@ CFLAGS = -O3 -Wno-format -Wall -Werror $(ANSI_CFLAGS) -fopenmp -march=native # Maybe too much warnings #CFLAGS += -Wcast-qual -Wswitch-default -Wconversion -Wunreachable-code # Specific C flags -CFLAGS := $(CFLAGS) -Wstrict-prototypes +CFLAGS := $(CFLAGS) -Wstrict-prototypes -Wno-error=strict-prototypes LFLAGS = -fopenmp DEFINES = -D_GNU_SOURCE -DNDEBUG INCLUDES = diff --git a/pystencils_benchmark/templates/Makefile b/pystencils_benchmark/templates/Makefile index 98fcaaa19d9a753fd346da9480fd4935d112f2e5..66b68b8d009137ffe0a3069d0f9e5e5a5f65d550 100644 --- a/pystencils_benchmark/templates/Makefile +++ b/pystencils_benchmark/templates/Makefile @@ -7,10 +7,25 @@ SRC_DIR = ./src MAKE_DIR = ./ Q ?= @ +{% if likwid %} +# LIKWID DEFINES +LIKWID_DEFINES := -DLIKWID_PERFMON +LIKWID_PATH = $(shell dirname $(shell which likwid-perfctr)) +LIKWID_LIB := -L$(LIKWID_PATH)/../lib/ +LIKWID_INC := -I$(LIKWID_PATH)/../include/ +{% endif %} + #DO NOT EDIT BELOW include $(MAKE_DIR)/$(TAG).mk INCLUDES += -I./include +{% if likwid %} +INCLUDES += $(LIKWID_INC) +DEFINES += $(LIKWID_DEFINES) +LFLAGS += $(LIKWID_LIB) +LIBS += -llikwid +{% endif %} + VPATH = $(SRC_DIR) ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c)) OBJ = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c)) diff --git a/pystencils_benchmark/templates/main.c b/pystencils_benchmark/templates/main.c index a4c462d598ba4c88ad0f63c0afb66d1962c07b7f..cec0f0eb6c50c360879d72b7fc817d78126adb9f 100644 --- a/pystencils_benchmark/templates/main.c +++ b/pystencils_benchmark/templates/main.c @@ -6,6 +6,9 @@ #include <stdlib.h> #include "timing.h" +{% if likwid %} +#include <likwid-marker.h> +{% endif %} //kernels {% for include in includes %} @@ -22,6 +25,10 @@ int main(int argc, char **argv) return -1; } int n_repeat = atoi(argv[1]); + {% if likwid %} + LIKWID_MARKER_INIT; + {%- endif %} + {% for kernel in kernels %} { // Kernel: {{kernel.name}} @@ -32,6 +39,9 @@ int main(int argc, char **argv) {% else %} {{dataType}}* {{field_name}} = ({{dataType}} *) malloc({{size}}); {% endif %} + {% if kernel.openmp %} + #pragma omp parallel for schedule(static) + {% endif %} for (unsigned long long i = 0; i < {{elements}}; ++i) {{field_name}}[i] = 0.23; {% endfor %} @@ -42,10 +52,31 @@ int main(int argc, char **argv) {{constantName}} = 0.23; {% endfor %} + {% if likwid %} + {% if kernel.openmp %} + #pragma omp parallel + { + {% endif %} + LIKWID_MARKER_REGISTER("{{kernel.name}}"); + {% if kernel.openmp %} + } + {% endif %} + {% endif %} + for(int warmup = 1; warmup >= 0; --warmup) { int repeat = 2; if(warmup == 0) { repeat = n_repeat; + {% if likwid %} + {% if kernel.openmp %} + #pragma omp parallel + { + {% endif %} + LIKWID_MARKER_START("{{kernel.name}}"); + {% if kernel.openmp %} + } + {% endif %} + {% endif %} } {% if timing %} @@ -65,9 +96,25 @@ int main(int argc, char **argv) printf("%s\t%e\n", "{{kernel.name}}",(wcEndTime - wcStartTime) / n_repeat ); {% endif %} } + + {% if likwid %} + {% if kernel.openmp %} + #pragma omp parallel + { + {% endif %} + LIKWID_MARKER_STOP("{{kernel.name}}"); + {% if kernel.openmp %} + } + {% endif %} + {% endif %} + {% for field_name, dataType, elements, size, offset, alignment in kernel.fields %} free({{field_name}}); {% endfor %} } {% endfor %} + + {% if likwid %} + LIKWID_MARKER_CLOSE; + {% endif %} }