From 71619b09079996e29bbb7591dc8578f1ff6f348a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20H=C3=B6nig?= <jan.hoenig@fau.de>
Date: Mon, 29 Nov 2021 20:53:50 +0100
Subject: [PATCH] Fixed CI. Improved output.

---
 .gitignore                                    |  1 +
 pystencils_benchmark/benchmark.py             | 33 ++++++-----
 pystencils_benchmark/templates/Clang.mk       |  4 +-
 pystencils_benchmark/templates/GCC.mk         |  4 +-
 pystencils_benchmark/templates/GCCdebug.mk    |  4 +-
 .../templates/aligned_malloc.h                | 19 -------
 pystencils_benchmark/templates/kernel.c       |  3 +-
 pystencils_benchmark/templates/main.c         | 56 ++++++++++---------
 pystencils_benchmark/templates/timing.h       |  2 +-
 tests/test_benchmark.py                       | 25 ++++++---
 tests/test_benchmark_vector.py                | 31 ----------
 {examle => ve_example}/test.py                |  0
 12 files changed, 71 insertions(+), 111 deletions(-)
 delete mode 100644 pystencils_benchmark/templates/aligned_malloc.h
 delete mode 100644 tests/test_benchmark_vector.py
 rename {examle => ve_example}/test.py (100%)

diff --git a/.gitignore b/.gitignore
index 5ec93d6..611d3b4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@ build/
 venv/
 pystencils_benchmark.egg-info/
 __pycache__
+report.xml
diff --git a/pystencils_benchmark/benchmark.py b/pystencils_benchmark/benchmark.py
index 25c9044..67cc3d5 100644
--- a/pystencils_benchmark/benchmark.py
+++ b/pystencils_benchmark/benchmark.py
@@ -15,6 +15,10 @@ from pystencils.integer_functions import modulo_ceil
 from pystencils_benchmark.enums import Compiler
 
 
+_env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=StrictUndefined, keep_trailing_newline=True,
+                   trim_blocks=True, lstrip_blocks=True)
+
+
 def generate_benchmark(kernel_asts: Union[KernelFunction, List[KernelFunction]],
                        path: Path = None,
                        *,
@@ -55,11 +59,10 @@ def compiler_toolchain(path: Path, compiler: Compiler) -> None:
         'compiler': name,
     }
 
-    env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=StrictUndefined)
     files = ['Makefile', f'{name}.mk']
     for file_name in files:
         with open(path / file_name, 'w+') as f:
-            template = env.get_template(file_name).render(**jinja_context)
+            template = _env.get_template(file_name).render(**jinja_context)
             f.write(template)
 
 
@@ -69,10 +72,9 @@ def copy_static_files(path: Path) -> None:
     include_path = path / 'include'
     include_path.mkdir(parents=True, exist_ok=True)
 
-    env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=StrictUndefined)
-    files = ['aligned_malloc.h', 'timing.h', 'timing.c']
+    files = ['timing.h', 'timing.c']
     for file_name in files:
-        template = env.get_template(file_name).render()
+        template = _env.get_template(file_name).render()
         if file_name[-1] == 'h':
             target_path = include_path / file_name
         elif file_name[-1] == 'c':
@@ -113,6 +115,7 @@ def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True):
                 np_dtype = get_base_type(p.symbol.dtype).numpy_dtype
                 size_data_type = np_dtype.itemsize
 
+                # TODO double check the size computation
                 dim0_size = field.shape[-1]
                 dim1_size = np.prod(field.shape[:-1])
                 elements = prod(field.shape)
@@ -127,17 +130,16 @@ def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True):
 
                     assert align % np_dtype.itemsize == 0
                     offset = ((dim0_size + padding_elements + ghost_layers) % kernel.instruction_set['width']) * size_data_type
-
-                    fields.append((p.field_name, dtype, elements, size, offset, align))
-                    call_parameters.append(p.field_name)
                 else:
                     size = elements * size_data_type
-                    fields.append((p.field_name, dtype, elements, size, 0, 0))
-                    call_parameters.append(p.field_name)
+                    offset = 0
+                    align = 0
+                fields.append((p.field_name, dtype, elements, size, offset, align))
+                call_parameters.append(p.field_name)
         kernels.append(Kernel(name=name, fields=fields, constants=constants, call_parameters=call_parameters,
                               call_argument_list=",".join(call_parameters)))
 
-        includes.add(f'#include "{name}.h"\n')
+        includes.add(name)
 
     jinja_context = {
         'kernels': kernels,
@@ -145,8 +147,7 @@ def kernel_main(kernels_ast: List[KernelFunction], timing: bool = True):
         'timing': timing,
     }
 
-    env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=StrictUndefined)
-    main = env.get_template('main.c').render(**jinja_context)
+    main = _env.get_template('main.c').render(**jinja_context)
     return main
 
 
@@ -159,8 +160,7 @@ def kernel_header(kernel_ast: KernelFunction, dialect: Backend = Backend.C) -> s
         'function_signature': function_signature,
     }
 
-    env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=StrictUndefined)
-    header = env.get_template('kernel.h').render(**jinja_context)
+    header = _env.get_template('kernel.h').render(**jinja_context)
     return header
 
 
@@ -176,6 +176,5 @@ def kernel_source(kernel_ast: KernelFunction, dialect: Backend = Backend.C) -> s
         'timing': True,
     }
 
-    env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=StrictUndefined)
-    source = env.get_template('kernel.c').render(**jinja_context)
+    source = _env.get_template('kernel.c').render(**jinja_context)
     return source
diff --git a/pystencils_benchmark/templates/Clang.mk b/pystencils_benchmark/templates/Clang.mk
index be60d38..96296cc 100644
--- a/pystencils_benchmark/templates/Clang.mk
+++ b/pystencils_benchmark/templates/Clang.mk
@@ -2,11 +2,11 @@ CC  = clang
 LINKER = $(CC)
 
 ANSI_CFLAGS  = -ansi
-ANSI_CFLAGS += -std=c99
+ANSI_CFLAGS += -std=c11
 ANSI_CFLAGS += -pedantic
 ANSI_CFLAGS += -Wextra
 
-CFLAGS   = -O3 -Wno-format -Wall $(ANSI_CFLAGS) -fopenmp -march=native
+CFLAGS   = -O3 -Wno-format -Wall -Werror $(ANSI_CFLAGS) -fopenmp -march=native
 # More warning pls
 #CFLAGS   += -Wfloat-equal -Wundef -Wshadow -Wpointer-arith -Wcast-align  -Wstrict-overflow=5 -Wwrite-strings -Waggregate-return
 # Maybe too much warnings
diff --git a/pystencils_benchmark/templates/GCC.mk b/pystencils_benchmark/templates/GCC.mk
index 46668e6..a65ec46 100644
--- a/pystencils_benchmark/templates/GCC.mk
+++ b/pystencils_benchmark/templates/GCC.mk
@@ -2,11 +2,11 @@ CC  = gcc
 LINKER = $(CC)
 
 ANSI_CFLAGS  = -ansi
-ANSI_CFLAGS += -std=c99
+ANSI_CFLAGS += -std=c11
 ANSI_CFLAGS += -pedantic
 ANSI_CFLAGS += -Wextra
 
-CFLAGS   = -O3 -Wno-format -Wall $(ANSI_CFLAGS) -fopenmp -march=native
+CFLAGS   = -O3 -Wno-format -Wall -Werror $(ANSI_CFLAGS) -fopenmp -march=native
 # More warning pls
 #CFLAGS   += -Wfloat-equal -Wundef -Wshadow -Wpointer-arith -Wcast-align  -Wstrict-overflow=5 -Wwrite-strings -Waggregate-return
 # Maybe too much warnings
diff --git a/pystencils_benchmark/templates/GCCdebug.mk b/pystencils_benchmark/templates/GCCdebug.mk
index 47b79c3..e0d4dd0 100644
--- a/pystencils_benchmark/templates/GCCdebug.mk
+++ b/pystencils_benchmark/templates/GCCdebug.mk
@@ -2,11 +2,11 @@ CC  = gcc
 LINKER = $(CC)
 
 ANSI_CFLAGS  = -ansi
-ANSI_CFLAGS += -std=c99
+ANSI_CFLAGS += -std=c11
 ANSI_CFLAGS += -pedantic
 ANSI_CFLAGS += -Wextra
 
-CFLAGS   = -O0 -g -Wno-format  -Wall $(ANSI_CFLAGS) -march=native
+CFLAGS   = -O0 -g -Wno-format -Wall -Werror $(ANSI_CFLAGS) -march=native
 LFLAGS   =
 DEFINES  = -D_GNU_SOURCE
 INCLUDES =
diff --git a/pystencils_benchmark/templates/aligned_malloc.h b/pystencils_benchmark/templates/aligned_malloc.h
deleted file mode 100644
index 52693f9..0000000
--- a/pystencils_benchmark/templates/aligned_malloc.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef _ALIGNED_MALLOC_H
-#define _ALIGNED_MALLOC_H
-#include <stddef.h>
-#include <stdlib.h>
-
-inline void* aligned_malloc(size_t size, size_t align) {
-    // Based on http://stackoverflow.com/q/16376942
-    void *result;
-    #if defined(_MSC_VER)
-        result = _aligned_malloc(size, align);
-    #elif defined(__INTEL_COMPILER)
-         result = _mm_malloc(size, align);
-    #else
-         if(posix_memalign(&result, align, size)) result = 0;
-    #endif
-    return result;
-}
-
-#endif
diff --git a/pystencils_benchmark/templates/kernel.c b/pystencils_benchmark/templates/kernel.c
index 772321a..9370baa 100644
--- a/pystencils_benchmark/templates/kernel.c
+++ b/pystencils_benchmark/templates/kernel.c
@@ -1,4 +1,5 @@
-{% for header in headers %}#include {{header}}
+{% for header in headers %}
+#include {{header}}
 {% endfor %}
 
 #define RESTRICT __restrict__
diff --git a/pystencils_benchmark/templates/main.c b/pystencils_benchmark/templates/main.c
index 1eea7c1..a4c462d 100644
--- a/pystencils_benchmark/templates/main.c
+++ b/pystencils_benchmark/templates/main.c
@@ -1,16 +1,16 @@
-#include <stdlib.h>
-#include <stdint.h>
-#include <stdbool.h>
+#include <assert.h>
 #include <math.h>
+#include <stdbool.h>
+#include <stdint.h>
 #include <stdio.h>
-#include <assert.h>
+#include <stdlib.h>
 
 #include "timing.h"
-#include "aligned_malloc.h"
 
-{%- for include in includes %}
-{{ include }}
-{%- endfor %}
+//kernels
+{% for include in includes %}
+#include "{{ include }}.h"
+{% endfor %}
 
 #define RESTRICT __restrict__
 #define FUNC_PREFIX
@@ -22,24 +22,25 @@ int main(int argc, char **argv)
         return -1;
     }
     int n_repeat = atoi(argv[1]);
-    {%- for kernel in kernels %}
-    {
-        {%- for field_name, dataType, elements, size, offset, alignment in kernel.fields %}
+    {% for kernel in kernels %}
+
+    { // Kernel: {{kernel.name}}
+        {% for field_name, dataType, elements, size, offset, alignment in kernel.fields %}
         // Initialization {{field_name}}
-        {%- if alignment > 0 %}
-        {{dataType}} * {{field_name}} = ({{dataType}} *) aligned_malloc({{size}}, {{alignment}});//, {{offset}});
-        {%- else %}
-        {{dataType}} * {{field_name}} = ({{dataType}} *) malloc({{size}});
-        {%- endif %}
+        {% if alignment > 0 %}
+        {{dataType}}* {{field_name}} = ({{dataType}} *) aligned_alloc({{alignment}}, {{size}});//, {{offset}});
+        {% else %}
+        {{dataType}}* {{field_name}} = ({{dataType}} *) malloc({{size}});
+        {% endif %}
         for (unsigned long long i = 0; i < {{elements}}; ++i)
             {{field_name}}[i] = 0.23;
-        {%- endfor %}
+        {% endfor %}
 
-        {%- for constantName, dataType in kernel.constants %}
+        {% for constantName, dataType in kernel.constants %}
         // Constant {{constantName}}
         {{dataType}} {{constantName}};
         {{constantName}} = 0.23;
-        {%- endfor %}
+        {% endfor %}
 
         for(int warmup = 1; warmup >= 0; --warmup) {
             int repeat = 2;
@@ -47,25 +48,26 @@ int main(int argc, char **argv)
                 repeat = n_repeat;
             }
 
-            {%- if timing %}
+            {% if timing %}
             double wcStartTime, cpuStartTime, wcEndTime, cpuEndTime;
             timing(&wcStartTime, &cpuStartTime);
-            {%- endif %}
+            {% endif %}
 
             for (; repeat > 0; --repeat)
             {
                 {{kernel.name}}({{kernel.call_argument_list}});
             }
-            {%- if timing %}
+
+            {% if timing %}
             timing(&wcEndTime, &cpuEndTime);
+
             if( warmup == 0)
                 printf("%s\t%e\n", "{{kernel.name}}",(wcEndTime - wcStartTime) / n_repeat );
-            {%- endif %}
+            {% endif %}
         }
-
-        {%- for field_name, dataType, elements, size, offset, alignment in kernel.fields %}
+        {% for field_name, dataType, elements, size, offset, alignment in kernel.fields %}
         free({{field_name}});
-        {%- endfor %}
+        {% endfor %}
     }
-    {%- endfor %}
+    {% endfor %}
 }
diff --git a/pystencils_benchmark/templates/timing.h b/pystencils_benchmark/templates/timing.h
index 6c6ff44..ed34c92 100644
--- a/pystencils_benchmark/templates/timing.h
+++ b/pystencils_benchmark/templates/timing.h
@@ -9,4 +9,4 @@
 
 void timing(double* wcTime, double* cpuTime);
 
-#endif
\ No newline at end of file
+#endif
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index ea2b3bc..31e115e 100755
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -2,30 +2,37 @@ import subprocess
 import numpy as np
 import sympy as sp
 import tempfile
+
+import pytest
 import pystencils as ps
 from pathlib import Path
 from pystencils_benchmark import generate_benchmark, Compiler
 
 
-def test_generate():
+compilers = (Compiler.GCC, Compiler.GCCdebug, Compiler.Clang)
+config_kwargs = ({}, {'cpu_vectorize_info': {'instruction_set': 'best'}})
+
+
+@pytest.mark.parametrize('compiler', compilers)
+@pytest.mark.parametrize('config_kwarg', config_kwargs)
+def test_generate(compiler, config_kwarg):
     a, b, c = ps.fields(a=np.ones(4000000), b=np.ones(4000000), c=np.ones(4000000))
     alpha = sp.symbols('alpha')
 
-    @ps.kernel_config(ps.CreateKernelConfig())
+    @ps.kernel_config(ps.CreateKernelConfig(**config_kwarg))
     def vadd():
         a[0] @= b[0] + c[0]
     kernel_vadd = ps.create_kernel(**vadd)
 
-    @ps.kernel_config(ps.CreateKernelConfig())
+    @ps.kernel_config(ps.CreateKernelConfig(**config_kwarg))
     def daxpy():
         b[0] @= alpha * a[0] + b[0]
     kernel_daxpy = ps.create_kernel(**daxpy)
 
-    for compiler in [Compiler.GCC, Compiler.GCCdebug, Compiler.Clang]:
-        with tempfile.TemporaryDirectory(dir=Path.cwd()) as temp_dir:
-            temp_dir = Path(temp_dir)
-            generate_benchmark([kernel_vadd, kernel_daxpy], temp_dir, compiler=compiler)
-            subprocess.run(['make', '-C', f'{temp_dir}'], check=True)
-            subprocess.run([f'{temp_dir}/benchmark-{compiler.name}', '10'], check=True)
+    with tempfile.TemporaryDirectory(dir=Path.cwd()) as temp_dir:
+        temp_dir = Path(temp_dir)
+        generate_benchmark([kernel_vadd, kernel_daxpy], temp_dir, compiler=compiler)
+        subprocess.run(['make', '-C', f'{temp_dir}'], check=True)
+        subprocess.run([f'{temp_dir}/benchmark-{compiler.name}', '10'], check=True)
 
 
diff --git a/tests/test_benchmark_vector.py b/tests/test_benchmark_vector.py
deleted file mode 100644
index 6fe2a6e..0000000
--- a/tests/test_benchmark_vector.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import subprocess
-import numpy as np
-import sympy as sp
-import tempfile
-import pystencils as ps
-from pathlib import Path
-from pystencils_benchmark import generate_benchmark, Compiler
-
-
-def test_generate():
-    a, b, c = ps.fields(a=np.ones(4000000), b=np.ones(4000000), c=np.ones(4000000))
-    alpha = sp.symbols('alpha')
-
-    @ps.kernel_config(ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': 'best'}))
-    def vadd():
-        a[0] @= b[0] + c[0]
-    kernel_vadd = ps.create_kernel(**vadd)
-
-    @ps.kernel_config(ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': 'best'}))
-    def daxpy():
-        b[0] @= alpha * a[0] + b[0]
-    kernel_daxpy = ps.create_kernel(**daxpy)
-
-    for compiler in [Compiler.GCC, Compiler.GCCdebug, Compiler.Clang]:
-        with tempfile.TemporaryDirectory(dir=Path.cwd()) as temp_dir:
-            temp_dir = Path(temp_dir)
-            generate_benchmark([kernel_vadd, kernel_daxpy], temp_dir, compiler=compiler)
-            subprocess.run(['make', '-C', f'{temp_dir}'], check=True)
-            subprocess.run([f'{temp_dir}/benchmark-{compiler.name}', '10'], check=True)
-
-
diff --git a/examle/test.py b/ve_example/test.py
similarity index 100%
rename from examle/test.py
rename to ve_example/test.py
-- 
GitLab