diff --git a/examle/test.py b/examle/test.py index d5166789bf31934a06b731675489c64a8db84394..ee5e32bf78fdde057943c13c9b523db671ce7b3d 100755 --- a/examle/test.py +++ b/examle/test.py @@ -4,40 +4,52 @@ import subprocess import numpy as np import sympy as sp import pystencils as ps -from pystencils_benchmark import generate_benchmark +from pystencils_benchmark import generate_benchmark, Compiler from pathlib import Path -def generate(path: Path): +def generate(path: Path, compiler: Compiler): a, b, c = ps.fields(a=np.ones(4000000), b=np.ones(4000000), c=np.ones(4000000)) alpha = sp.symbols('alpha') + kernels = [] @ps.kernel_config(ps.CreateKernelConfig()) def vadd(): a[0] @= b[0] + c[0] - kernel_vadd = ps.create_kernel(**vadd) + kernels.append(ps.create_kernel(**vadd)) + + @ps.kernel_config(ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': 'best'})) + def vadd_vector(): + a[0] @= b[0] + c[0] + kernels.append(ps.create_kernel(**vadd_vector)) @ps.kernel_config(ps.CreateKernelConfig()) def daxpy(): b[0] @= alpha * a[0] + b[0] - kernel_daxpy = ps.create_kernel(**daxpy) + kernels.append(ps.create_kernel(**daxpy)) + + @ps.kernel_config(ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': 'best'})) + def daxpy_vector(): + b[0] @= alpha * a[0] + b[0] + kernels.append(ps.create_kernel(**daxpy_vector)) - generate_benchmark([kernel_vadd, kernel_daxpy], path) + generate_benchmark(kernels, path, compiler=compiler) def make(path: Path): - subprocess.run(['make']) + subprocess.run(['make'], check=True) -def execute(path: Path): - subprocess.run(['./benchmark-GCC', '200']) +def execute(path: Path, compiler: Compiler): + subprocess.run([f'./benchmark-{compiler.name}', '100'], check=True) def main(): + compiler = Compiler.GCCdebug path = Path.cwd() - generate(path) + generate(path, compiler) make(path) - execute(path) + execute(path, compiler) if __name__ == '__main__': diff --git a/pystencils_benchmark/templates/Clang.mk b/pystencils_benchmark/templates/Clang.mk index 61eee7f897068aea24331bc3a9c9af941a55e12a..be60d3812bde44525848805090dd73aaad707a1d 100644 --- a/pystencils_benchmark/templates/Clang.mk +++ b/pystencils_benchmark/templates/Clang.mk @@ -6,14 +6,14 @@ ANSI_CFLAGS += -std=c99 ANSI_CFLAGS += -pedantic ANSI_CFLAGS += -Wextra -CFLAGS = -O3 -Wno-format -Wall $(ANSI_CFLAGS) -fopenmp +CFLAGS = -O3 -Wno-format -Wall $(ANSI_CFLAGS) -fopenmp -march=native # More warning pls #CFLAGS += -Wfloat-equal -Wundef -Wshadow -Wpointer-arith -Wcast-align -Wstrict-overflow=5 -Wwrite-strings -Waggregate-return # Maybe too much warnings #CFLAGS += -Wcast-qual -Wswitch-default -Wconversion -Wunreachable-code # Specific C flags CFLAGS := $(CFLAGS) -Wstrict-prototypes -LFLAGS = -fopenmp +LFLAGS = -fopenmp=libomp DEFINES = -D_GNU_SOURCE -DNDEBUG INCLUDES = LIBS = diff --git a/pystencils_benchmark/templates/GCC.mk b/pystencils_benchmark/templates/GCC.mk index c633858f2f897b5255dd9995391cab1bdaad7caf..46668e6b9d61f6d936ea315c998bfd36ab548933 100644 --- a/pystencils_benchmark/templates/GCC.mk +++ b/pystencils_benchmark/templates/GCC.mk @@ -6,7 +6,7 @@ ANSI_CFLAGS += -std=c99 ANSI_CFLAGS += -pedantic ANSI_CFLAGS += -Wextra -CFLAGS = -O3 -Wno-format -Wall $(ANSI_CFLAGS) -fopenmp +CFLAGS = -O3 -Wno-format -Wall $(ANSI_CFLAGS) -fopenmp -march=native # More warning pls #CFLAGS += -Wfloat-equal -Wundef -Wshadow -Wpointer-arith -Wcast-align -Wstrict-overflow=5 -Wwrite-strings -Waggregate-return # Maybe too much warnings diff --git a/pystencils_benchmark/templates/GCCdebug.mk b/pystencils_benchmark/templates/GCCdebug.mk index f453f0d9a253f37919e0927f74a8e2b8efe72152..47b79c32d75c3ecc0d015110b7cd11b280ee6918 100644 --- a/pystencils_benchmark/templates/GCCdebug.mk +++ b/pystencils_benchmark/templates/GCCdebug.mk @@ -6,8 +6,7 @@ ANSI_CFLAGS += -std=c99 ANSI_CFLAGS += -pedantic ANSI_CFLAGS += -Wextra -CFLAGS = -O0 -g -Wno-format -Wall $(ANSI_CFLAGS) -FCFLAGS = +CFLAGS = -O0 -g -Wno-format -Wall $(ANSI_CFLAGS) -march=native LFLAGS = DEFINES = -D_GNU_SOURCE INCLUDES = diff --git a/pystencils_benchmark/templates/Makefile b/pystencils_benchmark/templates/Makefile index b9b8cfc6878061d69b55267026bce72adaa21316..98fcaaa19d9a753fd346da9480fd4935d112f2e5 100644 --- a/pystencils_benchmark/templates/Makefile +++ b/pystencils_benchmark/templates/Makefile @@ -30,7 +30,7 @@ $(BUILD_DIR)/%.o: %.c $(BUILD_DIR)/%.s: %.c @echo "===> GENERATE ASM $@" - $(Q)$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@ + $(Q)$(CC) -S $(CFLAGS) $< -o $@ tags: @echo "===> GENERATE TAGS" diff --git a/pystencils_benchmark/templates/aligned_malloc.h b/pystencils_benchmark/templates/aligned_malloc.h index 535315713d95a1781087cd07c2e879d3ce9590f7..52693f9b7fffd2a3108574088d9112a1028f7fe5 100644 --- a/pystencils_benchmark/templates/aligned_malloc.h +++ b/pystencils_benchmark/templates/aligned_malloc.h @@ -14,6 +14,6 @@ inline void* aligned_malloc(size_t size, size_t align) { if(posix_memalign(&result, align, size)) result = 0; #endif return result; -}; +} #endif diff --git a/pystencils_benchmark/templates/main.c b/pystencils_benchmark/templates/main.c index 7f8ea9992b7abd4196e1d044128a3b693acb0bb1..1eea7c1e92d232f68271dad5c14fb769c0985220 100644 --- a/pystencils_benchmark/templates/main.c +++ b/pystencils_benchmark/templates/main.c @@ -6,6 +6,7 @@ #include <assert.h> #include "timing.h" +#include "aligned_malloc.h" {%- for include in includes %} {{ include }} @@ -26,7 +27,7 @@ int main(int argc, char **argv) {%- for field_name, dataType, elements, size, offset, alignment in kernel.fields %} // Initialization {{field_name}} {%- if alignment > 0 %} - {{dataType}} * {{field_name}} = ({{dataType}} *) aligned_malloc_with_offset({{size}}, {{alignment}}, {{offset}}); + {{dataType}} * {{field_name}} = ({{dataType}} *) aligned_malloc({{size}}, {{alignment}});//, {{offset}}); {%- else %} {{dataType}} * {{field_name}} = ({{dataType}} *) malloc({{size}}); {%- endif %}