diff --git a/examle/test.py b/examle/test.py
index d5166789bf31934a06b731675489c64a8db84394..ee5e32bf78fdde057943c13c9b523db671ce7b3d 100755
--- a/examle/test.py
+++ b/examle/test.py
@@ -4,40 +4,52 @@ import subprocess
 import numpy as np
 import sympy as sp
 import pystencils as ps
-from pystencils_benchmark import generate_benchmark
+from pystencils_benchmark import generate_benchmark, Compiler
 from pathlib import Path
 
 
-def generate(path: Path):
+def generate(path: Path, compiler: Compiler):
     a, b, c = ps.fields(a=np.ones(4000000), b=np.ones(4000000), c=np.ones(4000000))
     alpha = sp.symbols('alpha')
 
+    kernels = []
     @ps.kernel_config(ps.CreateKernelConfig())
     def vadd():
         a[0] @= b[0] + c[0]
-    kernel_vadd = ps.create_kernel(**vadd)
+    kernels.append(ps.create_kernel(**vadd))
+
+    @ps.kernel_config(ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': 'best'}))
+    def vadd_vector():
+        a[0] @= b[0] + c[0]
+    kernels.append(ps.create_kernel(**vadd_vector))
 
     @ps.kernel_config(ps.CreateKernelConfig())
     def daxpy():
         b[0] @= alpha * a[0] + b[0]
-    kernel_daxpy = ps.create_kernel(**daxpy)
+    kernels.append(ps.create_kernel(**daxpy))
+
+    @ps.kernel_config(ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': 'best'}))
+    def daxpy_vector():
+        b[0] @= alpha * a[0] + b[0]
+    kernels.append(ps.create_kernel(**daxpy_vector))
 
-    generate_benchmark([kernel_vadd, kernel_daxpy], path)
+    generate_benchmark(kernels, path, compiler=compiler)
 
 
 def make(path: Path):
-    subprocess.run(['make'])
+    subprocess.run(['make'], check=True)
 
 
-def execute(path: Path):
-    subprocess.run(['./benchmark-GCC', '200'])
+def execute(path: Path, compiler: Compiler):
+    subprocess.run([f'./benchmark-{compiler.name}', '100'], check=True)
 
 
 def main():
+    compiler = Compiler.GCCdebug
     path = Path.cwd()
-    generate(path)
+    generate(path, compiler)
     make(path)
-    execute(path)
+    execute(path, compiler)
 
 
 if __name__ == '__main__':
diff --git a/pystencils_benchmark/templates/Clang.mk b/pystencils_benchmark/templates/Clang.mk
index 61eee7f897068aea24331bc3a9c9af941a55e12a..be60d3812bde44525848805090dd73aaad707a1d 100644
--- a/pystencils_benchmark/templates/Clang.mk
+++ b/pystencils_benchmark/templates/Clang.mk
@@ -6,14 +6,14 @@ ANSI_CFLAGS += -std=c99
 ANSI_CFLAGS += -pedantic
 ANSI_CFLAGS += -Wextra
 
-CFLAGS   = -O3 -Wno-format -Wall $(ANSI_CFLAGS) -fopenmp
+CFLAGS   = -O3 -Wno-format -Wall $(ANSI_CFLAGS) -fopenmp -march=native
 # More warning pls
 #CFLAGS   += -Wfloat-equal -Wundef -Wshadow -Wpointer-arith -Wcast-align  -Wstrict-overflow=5 -Wwrite-strings -Waggregate-return
 # Maybe too much warnings
 #CFLAGS   += -Wcast-qual -Wswitch-default -Wconversion -Wunreachable-code
 # Specific C flags
 CFLAGS   := $(CFLAGS) -Wstrict-prototypes
-LFLAGS   = -fopenmp
+LFLAGS   = -fopenmp=libomp
 DEFINES  = -D_GNU_SOURCE -DNDEBUG
 INCLUDES =
 LIBS     =
diff --git a/pystencils_benchmark/templates/GCC.mk b/pystencils_benchmark/templates/GCC.mk
index c633858f2f897b5255dd9995391cab1bdaad7caf..46668e6b9d61f6d936ea315c998bfd36ab548933 100644
--- a/pystencils_benchmark/templates/GCC.mk
+++ b/pystencils_benchmark/templates/GCC.mk
@@ -6,7 +6,7 @@ ANSI_CFLAGS += -std=c99
 ANSI_CFLAGS += -pedantic
 ANSI_CFLAGS += -Wextra
 
-CFLAGS   = -O3 -Wno-format -Wall $(ANSI_CFLAGS) -fopenmp
+CFLAGS   = -O3 -Wno-format -Wall $(ANSI_CFLAGS) -fopenmp -march=native
 # More warning pls
 #CFLAGS   += -Wfloat-equal -Wundef -Wshadow -Wpointer-arith -Wcast-align  -Wstrict-overflow=5 -Wwrite-strings -Waggregate-return
 # Maybe too much warnings
diff --git a/pystencils_benchmark/templates/GCCdebug.mk b/pystencils_benchmark/templates/GCCdebug.mk
index f453f0d9a253f37919e0927f74a8e2b8efe72152..47b79c32d75c3ecc0d015110b7cd11b280ee6918 100644
--- a/pystencils_benchmark/templates/GCCdebug.mk
+++ b/pystencils_benchmark/templates/GCCdebug.mk
@@ -6,8 +6,7 @@ ANSI_CFLAGS += -std=c99
 ANSI_CFLAGS += -pedantic
 ANSI_CFLAGS += -Wextra
 
-CFLAGS   = -O0 -g -Wno-format  -Wall $(ANSI_CFLAGS)
-FCFLAGS  =
+CFLAGS   = -O0 -g -Wno-format  -Wall $(ANSI_CFLAGS) -march=native
 LFLAGS   =
 DEFINES  = -D_GNU_SOURCE
 INCLUDES =
diff --git a/pystencils_benchmark/templates/Makefile b/pystencils_benchmark/templates/Makefile
index b9b8cfc6878061d69b55267026bce72adaa21316..98fcaaa19d9a753fd346da9480fd4935d112f2e5 100644
--- a/pystencils_benchmark/templates/Makefile
+++ b/pystencils_benchmark/templates/Makefile
@@ -30,7 +30,7 @@ $(BUILD_DIR)/%.o:  %.c
 
 $(BUILD_DIR)/%.s:  %.c
 	@echo "===>  GENERATE ASM  $@"
-	$(Q)$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@
+	$(Q)$(CC) -S $(CFLAGS) $< -o $@
 
 tags:
 	@echo "===>  GENERATE  TAGS"
diff --git a/pystencils_benchmark/templates/aligned_malloc.h b/pystencils_benchmark/templates/aligned_malloc.h
index 535315713d95a1781087cd07c2e879d3ce9590f7..52693f9b7fffd2a3108574088d9112a1028f7fe5 100644
--- a/pystencils_benchmark/templates/aligned_malloc.h
+++ b/pystencils_benchmark/templates/aligned_malloc.h
@@ -14,6 +14,6 @@ inline void* aligned_malloc(size_t size, size_t align) {
          if(posix_memalign(&result, align, size)) result = 0;
     #endif
     return result;
-};
+}
 
 #endif
diff --git a/pystencils_benchmark/templates/main.c b/pystencils_benchmark/templates/main.c
index 7f8ea9992b7abd4196e1d044128a3b693acb0bb1..1eea7c1e92d232f68271dad5c14fb769c0985220 100644
--- a/pystencils_benchmark/templates/main.c
+++ b/pystencils_benchmark/templates/main.c
@@ -6,6 +6,7 @@
 #include <assert.h>
 
 #include "timing.h"
+#include "aligned_malloc.h"
 
 {%- for include in includes %}
 {{ include }}
@@ -26,7 +27,7 @@ int main(int argc, char **argv)
         {%- for field_name, dataType, elements, size, offset, alignment in kernel.fields %}
         // Initialization {{field_name}}
         {%- if alignment > 0 %}
-        {{dataType}} * {{field_name}} = ({{dataType}} *) aligned_malloc_with_offset({{size}}, {{alignment}}, {{offset}});
+        {{dataType}} * {{field_name}} = ({{dataType}} *) aligned_malloc({{size}}, {{alignment}});//, {{offset}});
         {%- else %}
         {{dataType}} * {{field_name}} = ({{dataType}} *) malloc({{size}});
         {%- endif %}