From 3a8e49af8fb516289a585e6f57345d4c83c1bef8 Mon Sep 17 00:00:00 2001 From: Rafael Ravedutti <rafaelravedutti@gmail.com> Date: Thu, 21 Dec 2023 03:19:14 +0100 Subject: [PATCH] Generate OpenMP code (still not working) Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com> --- Makefile | 2 +- runtime/devices/device.hpp | 24 ++++++++++++++++++++++++ src/pairs/__init__.py | 5 ++++- src/pairs/code_gen/cgen.py | 8 ++++++++ src/pairs/code_gen/target.py | 4 ++++ 5 files changed, 41 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 1a0887c..c823f1e 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ PYCMD=python3 CC=mpicc #CC=mpiicpx #CC=mpiicpc -CFLAGS=-Ofast -march=core-avx2 ${MPI_FLAGS} ${LIKWID_FLAGS} +CFLAGS=-Ofast -march=core-avx2 -fopenmp ${MPI_FLAGS} ${LIKWID_FLAGS} #CFLAGS=-Ofast -xHost -qopt-zmm-usage=high ${MPI_FLAGS} ${LIKWID_FLAGS} #CFLAGS=-Ofast -xCORE-AVX512 -qopt-zmm-usage=high ${MPI_FLAGS} ${LIKWID_FLAGS} DEBUG_FLAGS= diff --git a/runtime/devices/device.hpp b/runtime/devices/device.hpp index ade0405..107b70e 100644 --- a/runtime/devices/device.hpp +++ b/runtime/devices/device.hpp @@ -25,6 +25,29 @@ __host__ void copy_slice_to_host(const void *d_ptr, void *h_ptr, size_t offset, __host__ void copy_static_symbol_to_device(void *h_ptr, const void *d_ptr, size_t count); __host__ void copy_static_symbol_to_host(void *d_ptr, const void *h_ptr, size_t count); +#ifdef PAIRS_TARGET_OPENMP +#include <omp.h> + +inline __host__ int host_atomic_add(int *addr, int val) { + int result; + #pragma omp critical + { + *addr += val; + result = *addr; + } + return result - val; +} + +inline __host__ real_t host_atomic_add(real_t *addr, real_t val) { + real_t result; + #pragma omp critical + { + *addr += val; + result = *addr; + } + return result - val; +} +#else inline __host__ int host_atomic_add(int *addr, int val) { *addr += val; return *addr - val; @@ -35,6 +58,7 @@ inline __host__ real_t host_atomic_add(real_t *addr, real_t val) { *addr += val; return tmp; } +#endif inline __host__ int host_atomic_add_resize_check(int *addr, int val, int *resize, int capacity) { const int add_res = *addr + val; diff --git a/src/pairs/__init__.py b/src/pairs/__init__.py index ab81066..e89e0a7 100644 --- a/src/pairs/__init__.py +++ b/src/pairs/__init__.py @@ -21,7 +21,10 @@ def simulation( CGen(ref, debug), shapes, dims, timesteps, double_prec, use_contact_history, particle_capacity, neighbor_capacity) -def target_cpu(): +def target_cpu(parallel=False): + if parallel: + return Target(Target.Backend_CPP, [Target.Feature_CPU, Target.Feature_OpenMP]) + return Target(Target.Backend_CPP, Target.Feature_CPU) def target_gpu(): diff --git a/src/pairs/code_gen/cgen.py b/src/pairs/code_gen/cgen.py index adf3fb9..0d6d8b0 100644 --- a/src/pairs/code_gen/cgen.py +++ b/src/pairs/code_gen/cgen.py @@ -62,6 +62,10 @@ class CGen: if self.target.is_gpu(): self.print("#define PAIRS_TARGET_CUDA") + if self.target.is_openmp(): + self.print("#define PAIRS_TARGET_OPENMP") + self.print("#include <omp.h>") + self.print("#include <limits.h>") self.print("#include <math.h>") self.print("#include <stdbool.h>") @@ -507,6 +511,10 @@ class CGen: iterator = self.generate_expression(ast_node.iterator) lower_range = self.generate_expression(ast_node.min) upper_range = self.generate_expression(ast_node.max) + + if self.target.is_openmp() and ast_node.is_kernel_candidate(): + self.print("#pragma omp parallel for") + self.print(f"for(int {iterator} = {lower_range}; {iterator} < {upper_range}; {iterator}++) {{") self.generate_statement(ast_node.block) self.print("}") diff --git a/src/pairs/code_gen/target.py b/src/pairs/code_gen/target.py index 30acef5..ef569ea 100644 --- a/src/pairs/code_gen/target.py +++ b/src/pairs/code_gen/target.py @@ -14,6 +14,7 @@ class Target: Feature_AVX2 = 3 Feature_AVX512 = 4 Feature_GPU = 5 + Feature_OpenMP = 6 # Operating system OS_Unknown = 0 @@ -37,3 +38,6 @@ class Target: def is_gpu(self): return self.has_feature(Target.Feature_GPU) + + def is_openmp(self): + return self.has_feature(Target.Feature_OpenMP) -- GitLab