Skip to content
Snippets Groups Projects
Commit 95e8da6e authored by Markus Holzer's avatar Markus Holzer
Browse files

Merge branch 'likwid' into 'master'

Adding likwid markers

See merge request hoenig/pystencils-benchmark!2
parents 7a44d4fa 06d17e90
No related branches found
No related tags found
1 merge request!2Adding likwid markers
Pipeline #54771 passed with warnings
...@@ -6,7 +6,7 @@ from jinja2 import Environment, PackageLoader, StrictUndefined ...@@ -6,7 +6,7 @@ from jinja2 import Environment, PackageLoader, StrictUndefined
import numpy as np import numpy as np
from pystencils.backends.cbackend import generate_c, get_headers from pystencils.backends.cbackend import generate_c, get_headers
from pystencils.astnodes import KernelFunction from pystencils.astnodes import KernelFunction, PragmaBlock
from pystencils.enums import Backend from pystencils.enums import Backend
from pystencils.typing import get_base_type from pystencils.typing import get_base_type
from pystencils.sympyextensions import prod from pystencils.sympyextensions import prod
...@@ -22,7 +22,10 @@ _env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=Stric ...@@ -22,7 +22,10 @@ _env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=Stric
def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]], def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]],
path: Path = None, path: Path = None,
*, *,
compiler: Compiler = Compiler.GCC) -> None: compiler: Compiler = Compiler.GCC,
timing: bool = True,
likwid: bool = False
) -> None:
if path is None: if path is None:
path = Path('.') path = Path('.')
else: else:
...@@ -47,16 +50,17 @@ def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]], ...@@ -47,16 +50,17 @@ def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]],
f.write(source) f.write(source)
with open(src_path / 'main.c', 'w+') as f: with open(src_path / 'main.c', 'w+') as f:
f.write(kernel_main(kernel_asts)) f.write(kernel_main(kernel_asts, timing=timing, likwid=likwid))
copy_static_files(path) copy_static_files(path)
compiler_toolchain(path, compiler) compiler_toolchain(path, compiler, likwid)
def compiler_toolchain(path: Path, compiler: Compiler) -> None: def compiler_toolchain(path: Path, compiler: Compiler, likwid: bool) -> None:
name = compiler.name name = compiler.name
jinja_context = { jinja_context = {
'compiler': name, 'compiler': name,
'likwid': likwid,
} }
files = ['Makefile', f'{name}.mk'] files = ['Makefile', f'{name}.mk']
...@@ -85,17 +89,19 @@ def copy_static_files(path: Path) -> None: ...@@ -85,17 +89,19 @@ def copy_static_files(path: Path) -> None:
f.write(template) f.write(template)
def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True): def kernel_main(kernels_ast: List[KernelFunction], *,
timing: bool = True, likwid: bool = False) -> str:
""" """
Return C code of a benchmark program for the given kernel. Return C code of a benchmark program for the given kernel.
Args: Args:
kernels_ast: A list of the pystencils AST object as returned by create_kernel for benchmarking kernels_ast: A list of the pystencils AST object as returned by create_kernel for benchmarking
timing: add timing output to the code, prints time per iteration to stdout timing: add timing output to the code, prints time per iteration to stdout
likwid: add likwid marker to the code
Returns: Returns:
C code as string C code as string
""" """
Kernel = namedtuple('Kernel', ['name', 'constants', 'fields', 'call_parameters', 'call_argument_list']) Kernel = namedtuple('Kernel', ['name', 'constants', 'fields', 'call_parameters', 'call_argument_list', 'openmp'])
kernels = [] kernels = []
includes = set() includes = set()
for kernel in kernels_ast: for kernel in kernels_ast:
...@@ -104,6 +110,8 @@ def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True): ...@@ -104,6 +110,8 @@ def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True):
constants = [] constants = []
fields = [] fields = []
call_parameters = [] call_parameters = []
# TODO: Think about it maybe there is a better way to detect openmp
openmp = isinstance(kernel.body.args[0], PragmaBlock)
for p in kernel.get_parameters(): for p in kernel.get_parameters():
if not p.is_field_parameter: if not p.is_field_parameter:
constants.append((p.symbol.name, str(p.symbol.dtype))) constants.append((p.symbol.name, str(p.symbol.dtype)))
...@@ -129,15 +137,18 @@ def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True): ...@@ -129,15 +137,18 @@ def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True):
size = dim1_size * padding_bytes + np.prod(field.shape) * size_data_type size = dim1_size * padding_bytes + np.prod(field.shape) * size_data_type
assert align % np_dtype.itemsize == 0 assert align % np_dtype.itemsize == 0
offset = ((dim0_size + padding_elements + ghost_layers) % kernel.instruction_set['width']) * size_data_type offset = ((dim0_size + padding_elements + ghost_layers) %
kernel.instruction_set['width']) * size_data_type
else: else:
size = elements * size_data_type size = elements * size_data_type
offset = 0 offset = 0
align = 0 align = 0
fields.append((p.field_name, dtype, elements, size, offset, align)) fields.append((p.field_name, dtype, elements, size, offset, align))
call_parameters.append(p.field_name) call_parameters.append(p.field_name)
# TODO: Think about openmp detection again
kernels.append(Kernel(name=name, fields=fields, constants=constants, call_parameters=call_parameters, kernels.append(Kernel(name=name, fields=fields, constants=constants, call_parameters=call_parameters,
call_argument_list=",".join(call_parameters))) call_argument_list=",".join(call_parameters), openmp=openmp))
includes.add(name) includes.add(name)
...@@ -145,6 +156,7 @@ def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True): ...@@ -145,6 +156,7 @@ def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True):
'kernels': kernels, 'kernels': kernels,
'includes': includes, 'includes': includes,
'timing': timing, 'timing': timing,
'likwid': likwid,
} }
main = _env.get_template('main.c').render(**jinja_context) main = _env.get_template('main.c').render(**jinja_context)
......
...@@ -12,7 +12,7 @@ CFLAGS = -O3 -Wno-format -Wall -Werror $(ANSI_CFLAGS) -fopenmp -march=native ...@@ -12,7 +12,7 @@ CFLAGS = -O3 -Wno-format -Wall -Werror $(ANSI_CFLAGS) -fopenmp -march=native
# Maybe too much warnings # Maybe too much warnings
#CFLAGS += -Wcast-qual -Wswitch-default -Wconversion -Wunreachable-code #CFLAGS += -Wcast-qual -Wswitch-default -Wconversion -Wunreachable-code
# Specific C flags # Specific C flags
CFLAGS := $(CFLAGS) -Wstrict-prototypes CFLAGS := $(CFLAGS) -Wstrict-prototypes -Wno-error=strict-prototypes
LFLAGS = -fopenmp=libomp LFLAGS = -fopenmp=libomp
DEFINES = -D_GNU_SOURCE -DNDEBUG DEFINES = -D_GNU_SOURCE -DNDEBUG
INCLUDES = INCLUDES =
......
...@@ -12,7 +12,7 @@ CFLAGS = -O3 -Wno-format -Wall -Werror $(ANSI_CFLAGS) -fopenmp -march=native ...@@ -12,7 +12,7 @@ CFLAGS = -O3 -Wno-format -Wall -Werror $(ANSI_CFLAGS) -fopenmp -march=native
# Maybe too much warnings # Maybe too much warnings
#CFLAGS += -Wcast-qual -Wswitch-default -Wconversion -Wunreachable-code #CFLAGS += -Wcast-qual -Wswitch-default -Wconversion -Wunreachable-code
# Specific C flags # Specific C flags
CFLAGS := $(CFLAGS) -Wstrict-prototypes CFLAGS := $(CFLAGS) -Wstrict-prototypes -Wno-error=strict-prototypes
LFLAGS = -fopenmp LFLAGS = -fopenmp
DEFINES = -D_GNU_SOURCE -DNDEBUG DEFINES = -D_GNU_SOURCE -DNDEBUG
INCLUDES = INCLUDES =
......
...@@ -7,10 +7,25 @@ SRC_DIR = ./src ...@@ -7,10 +7,25 @@ SRC_DIR = ./src
MAKE_DIR = ./ MAKE_DIR = ./
Q ?= @ Q ?= @
{% if likwid %}
# LIKWID DEFINES
LIKWID_DEFINES := -DLIKWID_PERFMON
LIKWID_PATH = $(shell dirname $(shell which likwid-perfctr))
LIKWID_LIB := -L$(LIKWID_PATH)/../lib/
LIKWID_INC := -I$(LIKWID_PATH)/../include/
{% endif %}
#DO NOT EDIT BELOW #DO NOT EDIT BELOW
include $(MAKE_DIR)/$(TAG).mk include $(MAKE_DIR)/$(TAG).mk
INCLUDES += -I./include INCLUDES += -I./include
{% if likwid %}
INCLUDES += $(LIKWID_INC)
DEFINES += $(LIKWID_DEFINES)
LFLAGS += $(LIKWID_LIB)
LIBS += -llikwid
{% endif %}
VPATH = $(SRC_DIR) VPATH = $(SRC_DIR)
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c)) ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
OBJ = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c)) OBJ = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c))
......
...@@ -6,6 +6,9 @@ ...@@ -6,6 +6,9 @@
#include <stdlib.h> #include <stdlib.h>
#include "timing.h" #include "timing.h"
{% if likwid %}
#include <likwid-marker.h>
{% endif %}
//kernels //kernels
{% for include in includes %} {% for include in includes %}
...@@ -22,6 +25,10 @@ int main(int argc, char **argv) ...@@ -22,6 +25,10 @@ int main(int argc, char **argv)
return -1; return -1;
} }
int n_repeat = atoi(argv[1]); int n_repeat = atoi(argv[1]);
{% if likwid %}
LIKWID_MARKER_INIT;
{%- endif %}
{% for kernel in kernels %} {% for kernel in kernels %}
{ // Kernel: {{kernel.name}} { // Kernel: {{kernel.name}}
...@@ -32,6 +39,9 @@ int main(int argc, char **argv) ...@@ -32,6 +39,9 @@ int main(int argc, char **argv)
{% else %} {% else %}
{{dataType}}* {{field_name}} = ({{dataType}} *) malloc({{size}}); {{dataType}}* {{field_name}} = ({{dataType}} *) malloc({{size}});
{% endif %} {% endif %}
{% if kernel.openmp %}
#pragma omp parallel for schedule(static)
{% endif %}
for (unsigned long long i = 0; i < {{elements}}; ++i) for (unsigned long long i = 0; i < {{elements}}; ++i)
{{field_name}}[i] = 0.23; {{field_name}}[i] = 0.23;
{% endfor %} {% endfor %}
...@@ -42,10 +52,31 @@ int main(int argc, char **argv) ...@@ -42,10 +52,31 @@ int main(int argc, char **argv)
{{constantName}} = 0.23; {{constantName}} = 0.23;
{% endfor %} {% endfor %}
{% if likwid %}
{% if kernel.openmp %}
#pragma omp parallel
{
{% endif %}
LIKWID_MARKER_REGISTER("{{kernel.name}}");
{% if kernel.openmp %}
}
{% endif %}
{% endif %}
for(int warmup = 1; warmup >= 0; --warmup) { for(int warmup = 1; warmup >= 0; --warmup) {
int repeat = 2; int repeat = 2;
if(warmup == 0) { if(warmup == 0) {
repeat = n_repeat; repeat = n_repeat;
{% if likwid %}
{% if kernel.openmp %}
#pragma omp parallel
{
{% endif %}
LIKWID_MARKER_START("{{kernel.name}}");
{% if kernel.openmp %}
}
{% endif %}
{% endif %}
} }
{% if timing %} {% if timing %}
...@@ -65,9 +96,25 @@ int main(int argc, char **argv) ...@@ -65,9 +96,25 @@ int main(int argc, char **argv)
printf("%s\t%e\n", "{{kernel.name}}",(wcEndTime - wcStartTime) / n_repeat ); printf("%s\t%e\n", "{{kernel.name}}",(wcEndTime - wcStartTime) / n_repeat );
{% endif %} {% endif %}
} }
{% if likwid %}
{% if kernel.openmp %}
#pragma omp parallel
{
{% endif %}
LIKWID_MARKER_STOP("{{kernel.name}}");
{% if kernel.openmp %}
}
{% endif %}
{% endif %}
{% for field_name, dataType, elements, size, offset, alignment in kernel.fields %} {% for field_name, dataType, elements, size, offset, alignment in kernel.fields %}
free({{field_name}}); free({{field_name}});
{% endfor %} {% endfor %}
} }
{% endfor %} {% endfor %}
{% if likwid %}
LIKWID_MARKER_CLOSE;
{% endif %}
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment