Skip to content
Snippets Groups Projects
Commit 3c976921 authored by Jan Hönig's avatar Jan Hönig
Browse files

All todos done so far

parent c4141a13
Branches
No related tags found
No related merge requests found
from .enums import Compiler
from .benchmark import generate_benchmark, kernel_header, kernel_source from .benchmark import generate_benchmark, kernel_header, kernel_source
from typing import Union, List
from collections import namedtuple
from pathlib import Path from pathlib import Path
from jinja2 import Environment, PackageLoader, StrictUndefined from jinja2 import Environment, PackageLoader, StrictUndefined
import numpy as np
from pystencils.backends.cbackend import generate_c, get_headers from pystencils.backends.cbackend import generate_c, get_headers
from pystencils.astnodes import KernelFunction, PragmaBlock from pystencils.astnodes import KernelFunction, PragmaBlock
from pystencils.enums import Backend from pystencils.enums import Backend
...@@ -8,13 +12,13 @@ from pystencils.data_types import get_base_type ...@@ -8,13 +12,13 @@ from pystencils.data_types import get_base_type
from pystencils.sympyextensions import prod from pystencils.sympyextensions import prod
from pystencils.integer_functions import modulo_ceil from pystencils.integer_functions import modulo_ceil
import numpy as np from pystencils_benchmark.enums import Compiler
def generate_benchmark(kernel_ast: KernelFunction, def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]],
path: Path = None, path: Path = None,
*, *,
dialect: Backend = Backend.C) -> None: compiler: Compiler = Compiler.GCC) -> None:
if path is None: if path is None:
path = Path('.') path = Path('.')
else: else:
...@@ -24,20 +28,39 @@ def generate_benchmark(kernel_ast: KernelFunction, ...@@ -24,20 +28,39 @@ def generate_benchmark(kernel_ast: KernelFunction,
include_path = path / 'include' include_path = path / 'include'
include_path.mkdir(parents=True, exist_ok=True) include_path.mkdir(parents=True, exist_ok=True)
kernel_name = kernel_ast.function_name if isinstance(kernel_asts, KernelFunction):
kernel_asts = [kernel_asts]
for kernel_ast in kernel_asts:
kernel_name = kernel_ast.function_name
header = kernel_header(kernel_ast, dialect) header = kernel_header(kernel_ast)
with open(include_path / f'{kernel_name}.h', 'w+') as f: with open(include_path / f'{kernel_name}.h', 'w+') as f:
f.write(header) f.write(header)
source = kernel_source(kernel_ast, dialect) source = kernel_source(kernel_ast)
with open(src_path / f'{kernel_name}.c', 'w+') as f: with open(src_path / f'{kernel_name}.c', 'w+') as f:
f.write(source) f.write(source)
with open(src_path / 'main.c', 'w+') as f: with open(src_path / 'main.c', 'w+') as f:
f.write(kernel_main(kernel_ast)) f.write(kernel_main(kernel_asts))
copy_static_files(path) copy_static_files(path)
compiler_toolchain(path, compiler)
def compiler_toolchain(path: Path, compiler: Compiler) -> None:
name = compiler.name
jinja_context = {
'compiler': name,
}
env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=StrictUndefined)
files = ['Makefile', f'{name}.mk']
for file_name in files:
with open(path / file_name, 'w+') as f:
template = env.get_template(file_name).render(**jinja_context)
f.write(template)
def copy_static_files(path: Path) -> None: def copy_static_files(path: Path) -> None:
...@@ -47,8 +70,7 @@ def copy_static_files(path: Path) -> None: ...@@ -47,8 +70,7 @@ def copy_static_files(path: Path) -> None:
include_path.mkdir(parents=True, exist_ok=True) include_path.mkdir(parents=True, exist_ok=True)
env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=StrictUndefined) env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=StrictUndefined)
files = ['Makefile', 'aligned_malloc.h', 'timing.h', 'timing.c', 'include_Clang.mk', 'include_GCC.mk', files = ['aligned_malloc.h', 'timing.h', 'timing.c']
'include_ICC.mk', 'include_GCCdebug.mk']
for file_name in files: for file_name in files:
template = env.get_template(file_name).render() template = env.get_template(file_name).render()
if file_name[-1] == 'h': if file_name[-1] == 'h':
...@@ -61,61 +83,64 @@ def copy_static_files(path: Path) -> None: ...@@ -61,61 +83,64 @@ def copy_static_files(path: Path) -> None:
f.write(template) f.write(template)
def kernel_main(kernel: KernelFunction, timing: bool = True): def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True):
""" """
Return C code of a benchmark program for the given kernel. Return C code of a benchmark program for the given kernel.
Args: Args:
kernel: the pystencils AST object as returned by create_kernel kernels_ast: A list of the pystencils AST object as returned by create_kernel for benchmarking
timing: add timing output to the code, prints time per iteration to stdout timing: add timing output to the code, prints time per iteration to stdout
Returns: Returns:
C code as string C code as string
""" """
kernel_name = kernel.function_name Kernel = namedtuple('Kernel', ['name', 'constants', 'fields', 'call_parameters', 'call_argument_list'])
accessed_fields = {f.name: f for f in kernel.fields_accessed} kernels = []
constants = [] includes = set()
fields = [] for kernel in kernels_ast:
call_parameters = [] name = kernel.function_name
for p in kernel.get_parameters(): accessed_fields = {f.name: f for f in kernel.fields_accessed}
if not p.is_field_parameter: constants = []
constants.append((p.symbol.name, str(p.symbol.dtype))) fields = []
call_parameters.append(p.symbol.name) call_parameters = []
else: for p in kernel.get_parameters():
assert p.is_field_pointer, "Benchmark implemented only for kernels with fixed loop size" if not p.is_field_parameter:
field = accessed_fields[p.field_name] constants.append((p.symbol.name, str(p.symbol.dtype)))
dtype = str(get_base_type(p.symbol.dtype)) call_parameters.append(p.symbol.name)
np_dtype = get_base_type(p.symbol.dtype).numpy_dtype
size_data_type = np_dtype.itemsize
dim0_size = field.shape[-1]
dim1_size = np.prod(field.shape[:-1])
elements = prod(field.shape)
if kernel.instruction_set:
align = kernel.instruction_set['width'] * size_data_type
padding_elements = modulo_ceil(dim0_size, kernel.instruction_set['width']) - dim0_size
padding_bytes = padding_elements * size_data_type
ghost_layers = max(max(kernel.ghost_layers))
size = dim1_size * padding_bytes + np.prod(field.shape) * size_data_type
assert align % np_dtype.itemsize == 0
offset = ((dim0_size + padding_elements + ghost_layers) % kernel.instruction_set['width']) * size_data_type
fields.append((p.field_name, dtype, elements, size, offset, align))
call_parameters.append(p.field_name)
else: else:
size = elements * size_data_type assert p.is_field_pointer, "Benchmark implemented only for kernels with fixed loop size"
fields.append((p.field_name, dtype, elements, size, 0, 0)) field = accessed_fields[p.field_name]
call_parameters.append(p.field_name) dtype = str(get_base_type(p.symbol.dtype))
np_dtype = get_base_type(p.symbol.dtype).numpy_dtype
size_data_type = np_dtype.itemsize
dim0_size = field.shape[-1]
dim1_size = np.prod(field.shape[:-1])
elements = prod(field.shape)
if kernel.instruction_set:
align = kernel.instruction_set['width'] * size_data_type
padding_elements = modulo_ceil(dim0_size, kernel.instruction_set['width']) - dim0_size
padding_bytes = padding_elements * size_data_type
ghost_layers = max(max(kernel.ghost_layers))
size = dim1_size * padding_bytes + np.prod(field.shape) * size_data_type
assert align % np_dtype.itemsize == 0
offset = ((dim0_size + padding_elements + ghost_layers) % kernel.instruction_set['width']) * size_data_type
fields.append((p.field_name, dtype, elements, size, offset, align))
call_parameters.append(p.field_name)
else:
size = elements * size_data_type
fields.append((p.field_name, dtype, elements, size, 0, 0))
call_parameters.append(p.field_name)
kernels.append(Kernel(name=name, fields=fields, constants=constants, call_parameters=call_parameters,
call_argument_list=",".join(call_parameters)))
includes.add(f'#include "{name}.h"\n')
includes = f'#include "{kernel_name}.h"\n'
jinja_context = { jinja_context = {
'kernel_code': generate_c(kernel, dialect=Backend.C), 'kernels': kernels,
'kernel_name': kernel_name,
'fields': fields,
'constants': constants,
'call_argument_list': ",".join(call_parameters),
'includes': includes, 'includes': includes,
'timing': timing, 'timing': timing,
} }
......
from enum import Enum, auto
class Compiler(Enum):
GCC = auto()
GCCdebug = auto()
Clang = auto()
ICC = auto()
CC = clang CC = clang
CXX = clang++ LINKER = $(CC)
FC = gfortran
LINKER = $(CXX)
ANSI_CFLAGS = -ansi ANSI_CFLAGS = -ansi
ANSI_CFLAGS += -std=c++0x ANSI_CFLAGS += -std=c99
ANSI_CFLAGS += -pedantic ANSI_CFLAGS += -pedantic
ANSI_CFLAGS += -Wextra ANSI_CFLAGS += -Wextra
CFLAGS = -O3 -Wno-format -Wall $(ANSI_CFLAGS) CFLAGS = -O3 -Wno-format -Wall $(ANSI_CFLAGS) -fopenmp
# More warning pls # More warning pls
CFLAGS += -Wfloat-equal -Wundef -Wshadow -Wpointer-arith -Wcast-align -Wstrict-overflow=5 -Wwrite-strings -Waggregate-return #CFLAGS += -Wfloat-equal -Wundef -Wshadow -Wpointer-arith -Wcast-align -Wstrict-overflow=5 -Wwrite-strings -Waggregate-return
# Maybe too much warnings # Maybe too much warnings
CFLAGS += -Wcast-qual -Wswitch-default -Wconversion -Wunreachable-code #CFLAGS += -Wcast-qual -Wswitch-default -Wconversion -Wunreachable-code
CXXFLAGS := $(CFLAGS)
# Specific C flags # Specific C flags
CFLAGS := $(CFLAGS) -Wstrict-prototypes CFLAGS := $(CFLAGS) -Wstrict-prototypes
FCFLAGS = LFLAGS = -fopenmp
CPPFLAGS = -std=c++0x
LFLAGS = #-fopenmp
DEFINES = -D_GNU_SOURCE -DNDEBUG DEFINES = -D_GNU_SOURCE -DNDEBUG
INCLUDES = #-I/usr/lib/gcc/x86_64-linux-gnu/4.8/include/ INCLUDES =
LIBS = #-L/usr/lib/x86_64-linux-gnu/libomp.so.5 LIBS =
...@@ -5,10 +5,8 @@ ANSI_CFLAGS = -ansi ...@@ -5,10 +5,8 @@ ANSI_CFLAGS = -ansi
ANSI_CFLAGS += -std=c99 ANSI_CFLAGS += -std=c99
ANSI_CFLAGS += -pedantic ANSI_CFLAGS += -pedantic
ANSI_CFLAGS += -Wextra ANSI_CFLAGS += -Wextra
ANSI_CFLAGS += -O0
ANSI_CFLAGS += -g
CFLAGS = -Wno-format -Wall $(ANSI_CFLAGS) CFLAGS = -O0 -g -Wno-format -Wall $(ANSI_CFLAGS)
FCFLAGS = FCFLAGS =
LFLAGS = LFLAGS =
DEFINES = -D_GNU_SOURCE DEFINES = -D_GNU_SOURCE
......
TAG = GCC TAG = {{compiler}}
#CONFIGURE BUILD SYSTEM #CONFIGURE BUILD SYSTEM
TARGET = benchmark-$(TAG) TARGET = benchmark-$(TAG)
...@@ -8,13 +8,13 @@ MAKE_DIR = ./ ...@@ -8,13 +8,13 @@ MAKE_DIR = ./
Q ?= @ Q ?= @
#DO NOT EDIT BELOW #DO NOT EDIT BELOW
include $(MAKE_DIR)/include_$(TAG).mk include $(MAKE_DIR)/$(TAG).mk
INCLUDES += -I./include INCLUDES += -I./include
VPATH = $(SRC_DIR) VPATH = $(SRC_DIR)
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c)) ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
OBJ = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c)) OBJ = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c))
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(INCLUDES) CFLAGS := $(CFLAGS) $(DEFINES) $(INCLUDES)
${TARGET}: $(BUILD_DIR) $(OBJ) ${TARGET}: $(BUILD_DIR) $(OBJ)
...@@ -25,8 +25,8 @@ asm: $(BUILD_DIR) $(ASM) ...@@ -25,8 +25,8 @@ asm: $(BUILD_DIR) $(ASM)
$(BUILD_DIR)/%.o: %.c $(BUILD_DIR)/%.o: %.c
@echo "===> COMPILE $@" @echo "===> COMPILE $@"
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ $(Q)$(CC) -c $(CFLAGS) $< -o $@
$(Q)$(CC) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d $(Q)$(CC) $(CFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
$(BUILD_DIR)/%.s: %.c $(BUILD_DIR)/%.s: %.c
@echo "===> GENERATE ASM $@" @echo "===> GENERATE ASM $@"
...@@ -44,16 +44,7 @@ ifeq ($(findstring $(MAKECMDGOALS),clean),) ...@@ -44,16 +44,7 @@ ifeq ($(findstring $(MAKECMDGOALS),clean),)
-include $(OBJ:.o=.d) -include $(OBJ:.o=.d)
endif endif
.PHONY: clean distclean debug debug-clean debug-distclean .PHONY: clean distclean
debug:
@make TAG=GCCdebug
debug-clean:
@make clean TAG=GCCdebug
debug-distclean:
@make distclean TAG=GCCdebug
clean: clean:
@echo "===> CLEAN" @echo "===> CLEAN"
......
...@@ -5,56 +5,66 @@ ...@@ -5,56 +5,66 @@
#include <stdio.h> #include <stdio.h>
#include <assert.h> #include <assert.h>
{{ includes }} #include "timing.h"
{%- for include in includes %}
{{ include }}
{%- endfor %}
#define RESTRICT __restrict__ #define RESTRICT __restrict__
#define FUNC_PREFIX #define FUNC_PREFIX
void timing(double* wcTime, double* cpuTime);
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
{%- for field_name, dataType, elements, size, offset, alignment in fields %} if(argc < 2) {
// Initialization {{field_name}} printf("Usage: %s <n_repeat>\n", argv[0]);
{%- if alignment > 0 %} return -1;
{{dataType}} * {{field_name}} = ({{dataType}} *) aligned_malloc_with_offset({{size}}, {{alignment}}, {{offset}});
{%- else %}
{{dataType}} * {{field_name}} = ({{dataType}} *) malloc({{size}});
{%- endif %}
for (unsigned long long i = 0; i < {{elements}}; ++i)
{{field_name}}[i] = 0.23;
{%- endfor %}
{%- for constantName, dataType in constants %}
// Constant {{constantName}}
{{dataType}} {{constantName}};
{{constantName}} = 0.23;
{%- endfor %}
for(int warmup = 1; warmup >= 0; --warmup) {
int repeat = 2;
if(warmup == 0) {
repeat = atoi(argv[1]);
} }
int n_repeat = atoi(argv[1]);
{%- for kernel in kernels %}
{
{%- for field_name, dataType, elements, size, offset, alignment in kernel.fields %}
// Initialization {{field_name}}
{%- if alignment > 0 %}
{{dataType}} * {{field_name}} = ({{dataType}} *) aligned_malloc_with_offset({{size}}, {{alignment}}, {{offset}});
{%- else %}
{{dataType}} * {{field_name}} = ({{dataType}} *) malloc({{size}});
{%- endif %}
for (unsigned long long i = 0; i < {{elements}}; ++i)
{{field_name}}[i] = 0.23;
{%- endfor %}
{%- if timing %} {%- for constantName, dataType in kernel.constants %}
double wcStartTime, cpuStartTime, wcEndTime, cpuEndTime; // Constant {{constantName}}
timing(&wcStartTime, &cpuStartTime); {{dataType}} {{constantName}};
{%- endif %} {{constantName}} = 0.23;
{%- endfor %}
for (; repeat > 0; --repeat) for(int warmup = 1; warmup >= 0; --warmup) {
{ int repeat = 2;
{{kernel_name}}({{call_argument_list}}); if(warmup == 0) {
} repeat = n_repeat;
{%- if timing %} }
timing(&wcEndTime, &cpuEndTime);
if( warmup == 0) {%- if timing %}
printf("%e\n", (wcEndTime - wcStartTime) / atoi(argv[1]) ); double wcStartTime, cpuStartTime, wcEndTime, cpuEndTime;
{%- endif %} timing(&wcStartTime, &cpuStartTime);
{%- endif %}
} for (; repeat > 0; --repeat)
{
{{kernel.name}}({{kernel.call_argument_list}});
}
{%- if timing %}
timing(&wcEndTime, &cpuEndTime);
if( warmup == 0)
printf("%s\t%e\n", "{{kernel.name}}",(wcEndTime - wcStartTime) / n_repeat );
{%- endif %}
}
{%- for field_name, dataType, elements, size, offset, alignment in fields %} {%- for field_name, dataType, elements, size, offset, alignment in kernel.fields %}
free({{field_name}}); free({{field_name}});
{%- endfor %} {%- endfor %}
}
{%- endfor %}
} }
Jinja2>=3.0.2 Jinja2>=3.0.2
pystencils>=0.4.1 pystencils>=0.4.1
setuptools>=44.0.0 setuptools>=44.0.0
\ No newline at end of file numpy>=1.21.4
\ No newline at end of file
%% Cell type:code id: tags:
``` python
import numpy as np
import pystencils as ps
from pystencils_benchmark import generate_benchmark
from pathlib import Path
```
%% Cell type:code id: tags:
``` python
config = ps.CreateKernelConfig(function_name='vadd')
```
%% Cell type:code id: tags:
``` python
a, b, c = ps.fields(a=np.ones(4000000), b=np.ones(4000000), c=np.ones(4000000))
```
%% Cell type:code id: tags:
``` python
@ps.kernel
def vadd():
a[0] @= b[0] + c[0]
```
%% Cell type:code id: tags:
``` python
kernel_vadd = ps.create_kernel(vadd, config=config)
ps.show_code(kernel_vadd)
```
%% Output
%% Cell type:code id: tags:
``` python
example_path = Path.cwd() / 'example'
generate_benchmark(kernel_vadd, example_path)
```
test.py 0 → 100755
#!/usr/bin/env python
# coding: utf-8
import subprocess
import numpy as np
import sympy as sp
import pystencils as ps
from pystencils_benchmark import generate_benchmark
from pathlib import Path
def generate(path: Path):
a, b, c = ps.fields(a=np.ones(4000000), b=np.ones(4000000), c=np.ones(4000000))
alpha = sp.symbols('alpha')
@ps.kernel_config(ps.CreateKernelConfig())
def vadd():
a[0] @= b[0] + c[0]
kernel_vadd = ps.create_kernel(**vadd)
@ps.kernel_config(ps.CreateKernelConfig())
def daxpy():
b[0] @= alpha * a[0] + b[0]
kernel_daxpy = ps.create_kernel(**daxpy)
generate_benchmark([kernel_vadd, kernel_daxpy], path)
def make(path: Path):
subprocess.run(['make'])
def execute(path: Path):
subprocess.run(['./benchmark-GCC', '200'])
def main():
path = Path.cwd()
generate(path)
make(path)
execute(path)
if __name__ == '__main__':
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment