diff --git a/.gitignore b/.gitignore index 5ec93d6ef6776bfa9199408192cd3403c15c6191..611d3b46e31d4c0c5d2dff5a826d0a64aa1f1c49 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ build/ venv/ pystencils_benchmark.egg-info/ __pycache__ +report.xml diff --git a/pystencils_benchmark/benchmark.py b/pystencils_benchmark/benchmark.py index 25c90442b4a0c574af8e9377bc50f07a92a2fb38..67cc3d58a911371e9db8ce8374cf69c36b8bffdf 100644 --- a/pystencils_benchmark/benchmark.py +++ b/pystencils_benchmark/benchmark.py @@ -15,6 +15,10 @@ from pystencils.integer_functions import modulo_ceil from pystencils_benchmark.enums import Compiler +_env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=StrictUndefined, keep_trailing_newline=True, + trim_blocks=True, lstrip_blocks=True) + + def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]], path: Path = None, *, @@ -55,11 +59,10 @@ def compiler_toolchain(path: Path, compiler: Compiler) -> None: 'compiler': name, } - env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=StrictUndefined) files = ['Makefile', f'{name}.mk'] for file_name in files: with open(path / file_name, 'w+') as f: - template = env.get_template(file_name).render(**jinja_context) + template = _env.get_template(file_name).render(**jinja_context) f.write(template) @@ -69,10 +72,9 @@ def copy_static_files(path: Path) -> None: include_path = path / 'include' include_path.mkdir(parents=True, exist_ok=True) - env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=StrictUndefined) - files = ['aligned_malloc.h', 'timing.h', 'timing.c'] + files = ['timing.h', 'timing.c'] for file_name in files: - template = env.get_template(file_name).render() + template = _env.get_template(file_name).render() if file_name[-1] == 'h': target_path = include_path / file_name elif file_name[-1] == 'c': @@ -113,6 +115,7 @@ def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True): np_dtype = get_base_type(p.symbol.dtype).numpy_dtype size_data_type = np_dtype.itemsize + # TODO double check the size computation dim0_size = field.shape[-1] dim1_size = np.prod(field.shape[:-1]) elements = prod(field.shape) @@ -127,17 +130,16 @@ def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True): assert align % np_dtype.itemsize == 0 offset = ((dim0_size + padding_elements + ghost_layers) % kernel.instruction_set['width']) * size_data_type - - fields.append((p.field_name, dtype, elements, size, offset, align)) - call_parameters.append(p.field_name) else: size = elements * size_data_type - fields.append((p.field_name, dtype, elements, size, 0, 0)) - call_parameters.append(p.field_name) + offset = 0 + align = 0 + fields.append((p.field_name, dtype, elements, size, offset, align)) + call_parameters.append(p.field_name) kernels.append(Kernel(name=name, fields=fields, constants=constants, call_parameters=call_parameters, call_argument_list=",".join(call_parameters))) - includes.add(f'#include "{name}.h"\n') + includes.add(name) jinja_context = { 'kernels': kernels, @@ -145,8 +147,7 @@ def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True): 'timing': timing, } - env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=StrictUndefined) - main = env.get_template('main.c').render(**jinja_context) + main = _env.get_template('main.c').render(**jinja_context) return main @@ -159,8 +160,7 @@ def kernel_header(kernel_ast: KernelFunction, dialect: Backend = Backend.C) -> s 'function_signature': function_signature, } - env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=StrictUndefined) - header = env.get_template('kernel.h').render(**jinja_context) + header = _env.get_template('kernel.h').render(**jinja_context) return header @@ -176,6 +176,5 @@ def kernel_source(kernel_ast: KernelFunction, dialect: Backend = Backend.C) -> s 'timing': True, } - env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=StrictUndefined) - source = env.get_template('kernel.c').render(**jinja_context) + source = _env.get_template('kernel.c').render(**jinja_context) return source diff --git a/pystencils_benchmark/templates/Clang.mk b/pystencils_benchmark/templates/Clang.mk index be60d3812bde44525848805090dd73aaad707a1d..96296cc6d0aff6545ee8457aa601c86265b868a4 100644 --- a/pystencils_benchmark/templates/Clang.mk +++ b/pystencils_benchmark/templates/Clang.mk @@ -2,11 +2,11 @@ CC = clang LINKER = $(CC) ANSI_CFLAGS = -ansi -ANSI_CFLAGS += -std=c99 +ANSI_CFLAGS += -std=c11 ANSI_CFLAGS += -pedantic ANSI_CFLAGS += -Wextra -CFLAGS = -O3 -Wno-format -Wall $(ANSI_CFLAGS) -fopenmp -march=native +CFLAGS = -O3 -Wno-format -Wall -Werror $(ANSI_CFLAGS) -fopenmp -march=native # More warning pls #CFLAGS += -Wfloat-equal -Wundef -Wshadow -Wpointer-arith -Wcast-align -Wstrict-overflow=5 -Wwrite-strings -Waggregate-return # Maybe too much warnings diff --git a/pystencils_benchmark/templates/GCC.mk b/pystencils_benchmark/templates/GCC.mk index 46668e6b9d61f6d936ea315c998bfd36ab548933..a65ec464c663bfe23a90e5756203ede9eeb4d8de 100644 --- a/pystencils_benchmark/templates/GCC.mk +++ b/pystencils_benchmark/templates/GCC.mk @@ -2,11 +2,11 @@ CC = gcc LINKER = $(CC) ANSI_CFLAGS = -ansi -ANSI_CFLAGS += -std=c99 +ANSI_CFLAGS += -std=c11 ANSI_CFLAGS += -pedantic ANSI_CFLAGS += -Wextra -CFLAGS = -O3 -Wno-format -Wall $(ANSI_CFLAGS) -fopenmp -march=native +CFLAGS = -O3 -Wno-format -Wall -Werror $(ANSI_CFLAGS) -fopenmp -march=native # More warning pls #CFLAGS += -Wfloat-equal -Wundef -Wshadow -Wpointer-arith -Wcast-align -Wstrict-overflow=5 -Wwrite-strings -Waggregate-return # Maybe too much warnings diff --git a/pystencils_benchmark/templates/GCCdebug.mk b/pystencils_benchmark/templates/GCCdebug.mk index 47b79c32d75c3ecc0d015110b7cd11b280ee6918..e0d4dd000c00c06f33d156be474d8435bbf77c90 100644 --- a/pystencils_benchmark/templates/GCCdebug.mk +++ b/pystencils_benchmark/templates/GCCdebug.mk @@ -2,11 +2,11 @@ CC = gcc LINKER = $(CC) ANSI_CFLAGS = -ansi -ANSI_CFLAGS += -std=c99 +ANSI_CFLAGS += -std=c11 ANSI_CFLAGS += -pedantic ANSI_CFLAGS += -Wextra -CFLAGS = -O0 -g -Wno-format -Wall $(ANSI_CFLAGS) -march=native +CFLAGS = -O0 -g -Wno-format -Wall -Werror $(ANSI_CFLAGS) -march=native LFLAGS = DEFINES = -D_GNU_SOURCE INCLUDES = diff --git a/pystencils_benchmark/templates/aligned_malloc.h b/pystencils_benchmark/templates/aligned_malloc.h deleted file mode 100644 index 52693f9b7fffd2a3108574088d9112a1028f7fe5..0000000000000000000000000000000000000000 --- a/pystencils_benchmark/templates/aligned_malloc.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef _ALIGNED_MALLOC_H -#define _ALIGNED_MALLOC_H -#include <stddef.h> -#include <stdlib.h> - -inline void* aligned_malloc(size_t size, size_t align) { - // Based on http://stackoverflow.com/q/16376942 - void *result; - #if defined(_MSC_VER) - result = _aligned_malloc(size, align); - #elif defined(__INTEL_COMPILER) - result = _mm_malloc(size, align); - #else - if(posix_memalign(&result, align, size)) result = 0; - #endif - return result; -} - -#endif diff --git a/pystencils_benchmark/templates/kernel.c b/pystencils_benchmark/templates/kernel.c index 772321ac3fd822c40e68c528d652987008b49b7b..9370baac95669d61074a4236e985a5eff22cff56 100644 --- a/pystencils_benchmark/templates/kernel.c +++ b/pystencils_benchmark/templates/kernel.c @@ -1,4 +1,5 @@ -{% for header in headers %}#include {{header}} +{% for header in headers %} +#include {{header}} {% endfor %} #define RESTRICT __restrict__ diff --git a/pystencils_benchmark/templates/main.c b/pystencils_benchmark/templates/main.c index 1eea7c1e92d232f68271dad5c14fb769c0985220..a4c462d598ba4c88ad0f63c0afb66d1962c07b7f 100644 --- a/pystencils_benchmark/templates/main.c +++ b/pystencils_benchmark/templates/main.c @@ -1,16 +1,16 @@ -#include <stdlib.h> -#include <stdint.h> -#include <stdbool.h> +#include <assert.h> #include <math.h> +#include <stdbool.h> +#include <stdint.h> #include <stdio.h> -#include <assert.h> +#include <stdlib.h> #include "timing.h" -#include "aligned_malloc.h" -{%- for include in includes %} -{{ include }} -{%- endfor %} +//kernels +{% for include in includes %} +#include "{{ include }}.h" +{% endfor %} #define RESTRICT __restrict__ #define FUNC_PREFIX @@ -22,24 +22,25 @@ int main(int argc, char **argv) return -1; } int n_repeat = atoi(argv[1]); - {%- for kernel in kernels %} - { - {%- for field_name, dataType, elements, size, offset, alignment in kernel.fields %} + {% for kernel in kernels %} + + { // Kernel: {{kernel.name}} + {% for field_name, dataType, elements, size, offset, alignment in kernel.fields %} // Initialization {{field_name}} - {%- if alignment > 0 %} - {{dataType}} * {{field_name}} = ({{dataType}} *) aligned_malloc({{size}}, {{alignment}});//, {{offset}}); - {%- else %} - {{dataType}} * {{field_name}} = ({{dataType}} *) malloc({{size}}); - {%- endif %} + {% if alignment > 0 %} + {{dataType}}* {{field_name}} = ({{dataType}} *) aligned_alloc({{alignment}}, {{size}});//, {{offset}}); + {% else %} + {{dataType}}* {{field_name}} = ({{dataType}} *) malloc({{size}}); + {% endif %} for (unsigned long long i = 0; i < {{elements}}; ++i) {{field_name}}[i] = 0.23; - {%- endfor %} + {% endfor %} - {%- for constantName, dataType in kernel.constants %} + {% for constantName, dataType in kernel.constants %} // Constant {{constantName}} {{dataType}} {{constantName}}; {{constantName}} = 0.23; - {%- endfor %} + {% endfor %} for(int warmup = 1; warmup >= 0; --warmup) { int repeat = 2; @@ -47,25 +48,26 @@ int main(int argc, char **argv) repeat = n_repeat; } - {%- if timing %} + {% if timing %} double wcStartTime, cpuStartTime, wcEndTime, cpuEndTime; timing(&wcStartTime, &cpuStartTime); - {%- endif %} + {% endif %} for (; repeat > 0; --repeat) { {{kernel.name}}({{kernel.call_argument_list}}); } - {%- if timing %} + + {% if timing %} timing(&wcEndTime, &cpuEndTime); + if( warmup == 0) printf("%s\t%e\n", "{{kernel.name}}",(wcEndTime - wcStartTime) / n_repeat ); - {%- endif %} + {% endif %} } - - {%- for field_name, dataType, elements, size, offset, alignment in kernel.fields %} + {% for field_name, dataType, elements, size, offset, alignment in kernel.fields %} free({{field_name}}); - {%- endfor %} + {% endfor %} } - {%- endfor %} + {% endfor %} } diff --git a/pystencils_benchmark/templates/timing.h b/pystencils_benchmark/templates/timing.h index 6c6ff440cfb40ab0aa5271a9874de73cfd47ed70..ed34c92c454b0ef136f6dfd70b93c32c27f80df4 100644 --- a/pystencils_benchmark/templates/timing.h +++ b/pystencils_benchmark/templates/timing.h @@ -9,4 +9,4 @@ void timing(double* wcTime, double* cpuTime); -#endif \ No newline at end of file +#endif diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index ea2b3bc656f7b298249966a6b040fe2fa140d0f6..31e115e80e95325784bcf281ae848cf98c9f870e 100755 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -2,30 +2,37 @@ import subprocess import numpy as np import sympy as sp import tempfile + +import pytest import pystencils as ps from pathlib import Path from pystencils_benchmark import generate_benchmark, Compiler -def test_generate(): +compilers = (Compiler.GCC, Compiler.GCCdebug, Compiler.Clang) +config_kwargs = ({}, {'cpu_vectorize_info': {'instruction_set': 'best'}}) + + +@pytest.mark.parametrize('compiler', compilers) +@pytest.mark.parametrize('config_kwarg', config_kwargs) +def test_generate(compiler, config_kwarg): a, b, c = ps.fields(a=np.ones(4000000), b=np.ones(4000000), c=np.ones(4000000)) alpha = sp.symbols('alpha') - @ps.kernel_config(ps.CreateKernelConfig()) + @ps.kernel_config(ps.CreateKernelConfig(**config_kwarg)) def vadd(): a[0] @= b[0] + c[0] kernel_vadd = ps.create_kernel(**vadd) - @ps.kernel_config(ps.CreateKernelConfig()) + @ps.kernel_config(ps.CreateKernelConfig(**config_kwarg)) def daxpy(): b[0] @= alpha * a[0] + b[0] kernel_daxpy = ps.create_kernel(**daxpy) - for compiler in [Compiler.GCC, Compiler.GCCdebug, Compiler.Clang]: - with tempfile.TemporaryDirectory(dir=Path.cwd()) as temp_dir: - temp_dir = Path(temp_dir) - generate_benchmark([kernel_vadd, kernel_daxpy], temp_dir, compiler=compiler) - subprocess.run(['make', '-C', f'{temp_dir}'], check=True) - subprocess.run([f'{temp_dir}/benchmark-{compiler.name}', '10'], check=True) + with tempfile.TemporaryDirectory(dir=Path.cwd()) as temp_dir: + temp_dir = Path(temp_dir) + generate_benchmark([kernel_vadd, kernel_daxpy], temp_dir, compiler=compiler) + subprocess.run(['make', '-C', f'{temp_dir}'], check=True) + subprocess.run([f'{temp_dir}/benchmark-{compiler.name}', '10'], check=True) diff --git a/tests/test_benchmark_vector.py b/tests/test_benchmark_vector.py deleted file mode 100644 index 6fe2a6ebb1c7a4dc66912dda8a275689a0261f05..0000000000000000000000000000000000000000 --- a/tests/test_benchmark_vector.py +++ /dev/null @@ -1,31 +0,0 @@ -import subprocess -import numpy as np -import sympy as sp -import tempfile -import pystencils as ps -from pathlib import Path -from pystencils_benchmark import generate_benchmark, Compiler - - -def test_generate(): - a, b, c = ps.fields(a=np.ones(4000000), b=np.ones(4000000), c=np.ones(4000000)) - alpha = sp.symbols('alpha') - - @ps.kernel_config(ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': 'best'})) - def vadd(): - a[0] @= b[0] + c[0] - kernel_vadd = ps.create_kernel(**vadd) - - @ps.kernel_config(ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': 'best'})) - def daxpy(): - b[0] @= alpha * a[0] + b[0] - kernel_daxpy = ps.create_kernel(**daxpy) - - for compiler in [Compiler.GCC, Compiler.GCCdebug, Compiler.Clang]: - with tempfile.TemporaryDirectory(dir=Path.cwd()) as temp_dir: - temp_dir = Path(temp_dir) - generate_benchmark([kernel_vadd, kernel_daxpy], temp_dir, compiler=compiler) - subprocess.run(['make', '-C', f'{temp_dir}'], check=True) - subprocess.run([f'{temp_dir}/benchmark-{compiler.name}', '10'], check=True) - - diff --git a/examle/test.py b/ve_example/test.py similarity index 100% rename from examle/test.py rename to ve_example/test.py