From 71619b09079996e29bbb7591dc8578f1ff6f348a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B6nig?= <jan.hoenig@fau.de> Date: Mon, 29 Nov 2021 20:53:50 +0100 Subject: [PATCH] Fixed CI. Improved output. --- .gitignore | 1 + pystencils_benchmark/benchmark.py | 33 ++++++----- pystencils_benchmark/templates/Clang.mk | 4 +- pystencils_benchmark/templates/GCC.mk | 4 +- pystencils_benchmark/templates/GCCdebug.mk | 4 +- .../templates/aligned_malloc.h | 19 ------- pystencils_benchmark/templates/kernel.c | 3 +- pystencils_benchmark/templates/main.c | 56 ++++++++++--------- pystencils_benchmark/templates/timing.h | 2 +- tests/test_benchmark.py | 25 ++++++--- tests/test_benchmark_vector.py | 31 ---------- {examle => ve_example}/test.py | 0 12 files changed, 71 insertions(+), 111 deletions(-) delete mode 100644 pystencils_benchmark/templates/aligned_malloc.h delete mode 100644 tests/test_benchmark_vector.py rename {examle => ve_example}/test.py (100%) diff --git a/.gitignore b/.gitignore index 5ec93d6..611d3b4 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ build/ venv/ pystencils_benchmark.egg-info/ __pycache__ +report.xml diff --git a/pystencils_benchmark/benchmark.py b/pystencils_benchmark/benchmark.py index 25c9044..67cc3d5 100644 --- a/pystencils_benchmark/benchmark.py +++ b/pystencils_benchmark/benchmark.py @@ -15,6 +15,10 @@ from pystencils.integer_functions import modulo_ceil from pystencils_benchmark.enums import Compiler +_env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=StrictUndefined, keep_trailing_newline=True, + trim_blocks=True, lstrip_blocks=True) + + def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]], path: Path = None, *, @@ -55,11 +59,10 @@ def compiler_toolchain(path: Path, compiler: Compiler) -> None: 'compiler': name, } - env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=StrictUndefined) files = ['Makefile', f'{name}.mk'] for file_name in files: with open(path / file_name, 'w+') as f: - template = env.get_template(file_name).render(**jinja_context) + template = _env.get_template(file_name).render(**jinja_context) f.write(template) @@ -69,10 +72,9 @@ def copy_static_files(path: Path) -> None: include_path = path / 'include' include_path.mkdir(parents=True, exist_ok=True) - env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=StrictUndefined) - files = ['aligned_malloc.h', 'timing.h', 'timing.c'] + files = ['timing.h', 'timing.c'] for file_name in files: - template = env.get_template(file_name).render() + template = _env.get_template(file_name).render() if file_name[-1] == 'h': target_path = include_path / file_name elif file_name[-1] == 'c': @@ -113,6 +115,7 @@ def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True): np_dtype = get_base_type(p.symbol.dtype).numpy_dtype size_data_type = np_dtype.itemsize + # TODO double check the size computation dim0_size = field.shape[-1] dim1_size = np.prod(field.shape[:-1]) elements = prod(field.shape) @@ -127,17 +130,16 @@ def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True): assert align % np_dtype.itemsize == 0 offset = ((dim0_size + padding_elements + ghost_layers) % kernel.instruction_set['width']) * size_data_type - - fields.append((p.field_name, dtype, elements, size, offset, align)) - call_parameters.append(p.field_name) else: size = elements * size_data_type - fields.append((p.field_name, dtype, elements, size, 0, 0)) - call_parameters.append(p.field_name) + offset = 0 + align = 0 + fields.append((p.field_name, dtype, elements, size, offset, align)) + call_parameters.append(p.field_name) kernels.append(Kernel(name=name, fields=fields, constants=constants, call_parameters=call_parameters, call_argument_list=",".join(call_parameters))) - includes.add(f'#include "{name}.h"\n') + includes.add(name) jinja_context = { 'kernels': kernels, @@ -145,8 +147,7 @@ def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True): 'timing': timing, } - env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=StrictUndefined) - main = env.get_template('main.c').render(**jinja_context) + main = _env.get_template('main.c').render(**jinja_context) return main @@ -159,8 +160,7 @@ def kernel_header(kernel_ast: KernelFunction, dialect: Backend = Backend.C) -> s 'function_signature': function_signature, } - env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=StrictUndefined) - header = env.get_template('kernel.h').render(**jinja_context) + header = _env.get_template('kernel.h').render(**jinja_context) return header @@ -176,6 +176,5 @@ def kernel_source(kernel_ast: KernelFunction, dialect: Backend = Backend.C) -> s 'timing': True, } - env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=StrictUndefined) - source = env.get_template('kernel.c').render(**jinja_context) + source = _env.get_template('kernel.c').render(**jinja_context) return source diff --git a/pystencils_benchmark/templates/Clang.mk b/pystencils_benchmark/templates/Clang.mk index be60d38..96296cc 100644 --- a/pystencils_benchmark/templates/Clang.mk +++ b/pystencils_benchmark/templates/Clang.mk @@ -2,11 +2,11 @@ CC = clang LINKER = $(CC) ANSI_CFLAGS = -ansi -ANSI_CFLAGS += -std=c99 +ANSI_CFLAGS += -std=c11 ANSI_CFLAGS += -pedantic ANSI_CFLAGS += -Wextra -CFLAGS = -O3 -Wno-format -Wall $(ANSI_CFLAGS) -fopenmp -march=native +CFLAGS = -O3 -Wno-format -Wall -Werror $(ANSI_CFLAGS) -fopenmp -march=native # More warning pls #CFLAGS += -Wfloat-equal -Wundef -Wshadow -Wpointer-arith -Wcast-align -Wstrict-overflow=5 -Wwrite-strings -Waggregate-return # Maybe too much warnings diff --git a/pystencils_benchmark/templates/GCC.mk b/pystencils_benchmark/templates/GCC.mk index 46668e6..a65ec46 100644 --- a/pystencils_benchmark/templates/GCC.mk +++ b/pystencils_benchmark/templates/GCC.mk @@ -2,11 +2,11 @@ CC = gcc LINKER = $(CC) ANSI_CFLAGS = -ansi -ANSI_CFLAGS += -std=c99 +ANSI_CFLAGS += -std=c11 ANSI_CFLAGS += -pedantic ANSI_CFLAGS += -Wextra -CFLAGS = -O3 -Wno-format -Wall $(ANSI_CFLAGS) -fopenmp -march=native +CFLAGS = -O3 -Wno-format -Wall -Werror $(ANSI_CFLAGS) -fopenmp -march=native # More warning pls #CFLAGS += -Wfloat-equal -Wundef -Wshadow -Wpointer-arith -Wcast-align -Wstrict-overflow=5 -Wwrite-strings -Waggregate-return # Maybe too much warnings diff --git a/pystencils_benchmark/templates/GCCdebug.mk b/pystencils_benchmark/templates/GCCdebug.mk index 47b79c3..e0d4dd0 100644 --- a/pystencils_benchmark/templates/GCCdebug.mk +++ b/pystencils_benchmark/templates/GCCdebug.mk @@ -2,11 +2,11 @@ CC = gcc LINKER = $(CC) ANSI_CFLAGS = -ansi -ANSI_CFLAGS += -std=c99 +ANSI_CFLAGS += -std=c11 ANSI_CFLAGS += -pedantic ANSI_CFLAGS += -Wextra -CFLAGS = -O0 -g -Wno-format -Wall $(ANSI_CFLAGS) -march=native +CFLAGS = -O0 -g -Wno-format -Wall -Werror $(ANSI_CFLAGS) -march=native LFLAGS = DEFINES = -D_GNU_SOURCE INCLUDES = diff --git a/pystencils_benchmark/templates/aligned_malloc.h b/pystencils_benchmark/templates/aligned_malloc.h deleted file mode 100644 index 52693f9..0000000 --- a/pystencils_benchmark/templates/aligned_malloc.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef _ALIGNED_MALLOC_H -#define _ALIGNED_MALLOC_H -#include <stddef.h> -#include <stdlib.h> - -inline void* aligned_malloc(size_t size, size_t align) { - // Based on http://stackoverflow.com/q/16376942 - void *result; - #if defined(_MSC_VER) - result = _aligned_malloc(size, align); - #elif defined(__INTEL_COMPILER) - result = _mm_malloc(size, align); - #else - if(posix_memalign(&result, align, size)) result = 0; - #endif - return result; -} - -#endif diff --git a/pystencils_benchmark/templates/kernel.c b/pystencils_benchmark/templates/kernel.c index 772321a..9370baa 100644 --- a/pystencils_benchmark/templates/kernel.c +++ b/pystencils_benchmark/templates/kernel.c @@ -1,4 +1,5 @@ -{% for header in headers %}#include {{header}} +{% for header in headers %} +#include {{header}} {% endfor %} #define RESTRICT __restrict__ diff --git a/pystencils_benchmark/templates/main.c b/pystencils_benchmark/templates/main.c index 1eea7c1..a4c462d 100644 --- a/pystencils_benchmark/templates/main.c +++ b/pystencils_benchmark/templates/main.c @@ -1,16 +1,16 @@ -#include <stdlib.h> -#include <stdint.h> -#include <stdbool.h> +#include <assert.h> #include <math.h> +#include <stdbool.h> +#include <stdint.h> #include <stdio.h> -#include <assert.h> +#include <stdlib.h> #include "timing.h" -#include "aligned_malloc.h" -{%- for include in includes %} -{{ include }} -{%- endfor %} +//kernels +{% for include in includes %} +#include "{{ include }}.h" +{% endfor %} #define RESTRICT __restrict__ #define FUNC_PREFIX @@ -22,24 +22,25 @@ int main(int argc, char **argv) return -1; } int n_repeat = atoi(argv[1]); - {%- for kernel in kernels %} - { - {%- for field_name, dataType, elements, size, offset, alignment in kernel.fields %} + {% for kernel in kernels %} + + { // Kernel: {{kernel.name}} + {% for field_name, dataType, elements, size, offset, alignment in kernel.fields %} // Initialization {{field_name}} - {%- if alignment > 0 %} - {{dataType}} * {{field_name}} = ({{dataType}} *) aligned_malloc({{size}}, {{alignment}});//, {{offset}}); - {%- else %} - {{dataType}} * {{field_name}} = ({{dataType}} *) malloc({{size}}); - {%- endif %} + {% if alignment > 0 %} + {{dataType}}* {{field_name}} = ({{dataType}} *) aligned_alloc({{alignment}}, {{size}});//, {{offset}}); + {% else %} + {{dataType}}* {{field_name}} = ({{dataType}} *) malloc({{size}}); + {% endif %} for (unsigned long long i = 0; i < {{elements}}; ++i) {{field_name}}[i] = 0.23; - {%- endfor %} + {% endfor %} - {%- for constantName, dataType in kernel.constants %} + {% for constantName, dataType in kernel.constants %} // Constant {{constantName}} {{dataType}} {{constantName}}; {{constantName}} = 0.23; - {%- endfor %} + {% endfor %} for(int warmup = 1; warmup >= 0; --warmup) { int repeat = 2; @@ -47,25 +48,26 @@ int main(int argc, char **argv) repeat = n_repeat; } - {%- if timing %} + {% if timing %} double wcStartTime, cpuStartTime, wcEndTime, cpuEndTime; timing(&wcStartTime, &cpuStartTime); - {%- endif %} + {% endif %} for (; repeat > 0; --repeat) { {{kernel.name}}({{kernel.call_argument_list}}); } - {%- if timing %} + + {% if timing %} timing(&wcEndTime, &cpuEndTime); + if( warmup == 0) printf("%s\t%e\n", "{{kernel.name}}",(wcEndTime - wcStartTime) / n_repeat ); - {%- endif %} + {% endif %} } - - {%- for field_name, dataType, elements, size, offset, alignment in kernel.fields %} + {% for field_name, dataType, elements, size, offset, alignment in kernel.fields %} free({{field_name}}); - {%- endfor %} + {% endfor %} } - {%- endfor %} + {% endfor %} } diff --git a/pystencils_benchmark/templates/timing.h b/pystencils_benchmark/templates/timing.h index 6c6ff44..ed34c92 100644 --- a/pystencils_benchmark/templates/timing.h +++ b/pystencils_benchmark/templates/timing.h @@ -9,4 +9,4 @@ void timing(double* wcTime, double* cpuTime); -#endif \ No newline at end of file +#endif diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index ea2b3bc..31e115e 100755 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -2,30 +2,37 @@ import subprocess import numpy as np import sympy as sp import tempfile + +import pytest import pystencils as ps from pathlib import Path from pystencils_benchmark import generate_benchmark, Compiler -def test_generate(): +compilers = (Compiler.GCC, Compiler.GCCdebug, Compiler.Clang) +config_kwargs = ({}, {'cpu_vectorize_info': {'instruction_set': 'best'}}) + + +@pytest.mark.parametrize('compiler', compilers) +@pytest.mark.parametrize('config_kwarg', config_kwargs) +def test_generate(compiler, config_kwarg): a, b, c = ps.fields(a=np.ones(4000000), b=np.ones(4000000), c=np.ones(4000000)) alpha = sp.symbols('alpha') - @ps.kernel_config(ps.CreateKernelConfig()) + @ps.kernel_config(ps.CreateKernelConfig(**config_kwarg)) def vadd(): a[0] @= b[0] + c[0] kernel_vadd = ps.create_kernel(**vadd) - @ps.kernel_config(ps.CreateKernelConfig()) + @ps.kernel_config(ps.CreateKernelConfig(**config_kwarg)) def daxpy(): b[0] @= alpha * a[0] + b[0] kernel_daxpy = ps.create_kernel(**daxpy) - for compiler in [Compiler.GCC, Compiler.GCCdebug, Compiler.Clang]: - with tempfile.TemporaryDirectory(dir=Path.cwd()) as temp_dir: - temp_dir = Path(temp_dir) - generate_benchmark([kernel_vadd, kernel_daxpy], temp_dir, compiler=compiler) - subprocess.run(['make', '-C', f'{temp_dir}'], check=True) - subprocess.run([f'{temp_dir}/benchmark-{compiler.name}', '10'], check=True) + with tempfile.TemporaryDirectory(dir=Path.cwd()) as temp_dir: + temp_dir = Path(temp_dir) + generate_benchmark([kernel_vadd, kernel_daxpy], temp_dir, compiler=compiler) + subprocess.run(['make', '-C', f'{temp_dir}'], check=True) + subprocess.run([f'{temp_dir}/benchmark-{compiler.name}', '10'], check=True) diff --git a/tests/test_benchmark_vector.py b/tests/test_benchmark_vector.py deleted file mode 100644 index 6fe2a6e..0000000 --- a/tests/test_benchmark_vector.py +++ /dev/null @@ -1,31 +0,0 @@ -import subprocess -import numpy as np -import sympy as sp -import tempfile -import pystencils as ps -from pathlib import Path -from pystencils_benchmark import generate_benchmark, Compiler - - -def test_generate(): - a, b, c = ps.fields(a=np.ones(4000000), b=np.ones(4000000), c=np.ones(4000000)) - alpha = sp.symbols('alpha') - - @ps.kernel_config(ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': 'best'})) - def vadd(): - a[0] @= b[0] + c[0] - kernel_vadd = ps.create_kernel(**vadd) - - @ps.kernel_config(ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': 'best'})) - def daxpy(): - b[0] @= alpha * a[0] + b[0] - kernel_daxpy = ps.create_kernel(**daxpy) - - for compiler in [Compiler.GCC, Compiler.GCCdebug, Compiler.Clang]: - with tempfile.TemporaryDirectory(dir=Path.cwd()) as temp_dir: - temp_dir = Path(temp_dir) - generate_benchmark([kernel_vadd, kernel_daxpy], temp_dir, compiler=compiler) - subprocess.run(['make', '-C', f'{temp_dir}'], check=True) - subprocess.run([f'{temp_dir}/benchmark-{compiler.name}', '10'], check=True) - - diff --git a/examle/test.py b/ve_example/test.py similarity index 100% rename from examle/test.py rename to ve_example/test.py -- GitLab