From 3a8e49af8fb516289a585e6f57345d4c83c1bef8 Mon Sep 17 00:00:00 2001
From: Rafael Ravedutti <rafaelravedutti@gmail.com>
Date: Thu, 21 Dec 2023 03:19:14 +0100
Subject: [PATCH] Generate OpenMP code (still not working)

Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
---
 Makefile                     |  2 +-
 runtime/devices/device.hpp   | 24 ++++++++++++++++++++++++
 src/pairs/__init__.py        |  5 ++++-
 src/pairs/code_gen/cgen.py   |  8 ++++++++
 src/pairs/code_gen/target.py |  4 ++++
 5 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 1a0887c..c823f1e 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ PYCMD=python3
 CC=mpicc
 #CC=mpiicpx
 #CC=mpiicpc
-CFLAGS=-Ofast -march=core-avx2 ${MPI_FLAGS} ${LIKWID_FLAGS}
+CFLAGS=-Ofast -march=core-avx2 -fopenmp ${MPI_FLAGS} ${LIKWID_FLAGS}
 #CFLAGS=-Ofast -xHost -qopt-zmm-usage=high ${MPI_FLAGS} ${LIKWID_FLAGS}
 #CFLAGS=-Ofast -xCORE-AVX512 -qopt-zmm-usage=high ${MPI_FLAGS} ${LIKWID_FLAGS}
 DEBUG_FLAGS=
diff --git a/runtime/devices/device.hpp b/runtime/devices/device.hpp
index ade0405..107b70e 100644
--- a/runtime/devices/device.hpp
+++ b/runtime/devices/device.hpp
@@ -25,6 +25,29 @@ __host__ void copy_slice_to_host(const void *d_ptr, void *h_ptr, size_t offset,
 __host__ void copy_static_symbol_to_device(void *h_ptr, const void *d_ptr, size_t count);
 __host__ void copy_static_symbol_to_host(void *d_ptr, const void *h_ptr, size_t count);
 
+#ifdef PAIRS_TARGET_OPENMP
+#include <omp.h>
+
+inline __host__ int host_atomic_add(int *addr, int val) {
+    int result;
+    #pragma omp critical
+    {
+        *addr += val;
+        result = *addr;
+    }
+    return result - val;
+}
+
+inline __host__ real_t host_atomic_add(real_t *addr, real_t val) {
+    real_t result;
+    #pragma omp critical
+    {
+        *addr += val;
+        result = *addr;
+    }
+    return result - val;
+}
+#else
 inline __host__ int host_atomic_add(int *addr, int val) {
     *addr += val;
     return *addr - val;
@@ -35,6 +58,7 @@ inline __host__ real_t host_atomic_add(real_t *addr, real_t val) {
     *addr += val;
     return tmp;
 }
+#endif
 
 inline __host__ int host_atomic_add_resize_check(int *addr, int val, int *resize, int capacity) {
     const int add_res = *addr + val;
diff --git a/src/pairs/__init__.py b/src/pairs/__init__.py
index ab81066..e89e0a7 100644
--- a/src/pairs/__init__.py
+++ b/src/pairs/__init__.py
@@ -21,7 +21,10 @@ def simulation(
         CGen(ref, debug), shapes, dims, timesteps, double_prec, use_contact_history,
         particle_capacity, neighbor_capacity)
 
-def target_cpu():
+def target_cpu(parallel=False):
+    if parallel:
+        return Target(Target.Backend_CPP, [Target.Feature_CPU, Target.Feature_OpenMP])
+
     return Target(Target.Backend_CPP, Target.Feature_CPU)
 
 def target_gpu():
diff --git a/src/pairs/code_gen/cgen.py b/src/pairs/code_gen/cgen.py
index adf3fb9..0d6d8b0 100644
--- a/src/pairs/code_gen/cgen.py
+++ b/src/pairs/code_gen/cgen.py
@@ -62,6 +62,10 @@ class CGen:
         if self.target.is_gpu():
             self.print("#define PAIRS_TARGET_CUDA")
 
+        if self.target.is_openmp():
+            self.print("#define PAIRS_TARGET_OPENMP")
+            self.print("#include <omp.h>")
+
         self.print("#include <limits.h>")
         self.print("#include <math.h>")
         self.print("#include <stdbool.h>")
@@ -507,6 +511,10 @@ class CGen:
             iterator = self.generate_expression(ast_node.iterator)
             lower_range = self.generate_expression(ast_node.min)
             upper_range = self.generate_expression(ast_node.max)
+
+            if self.target.is_openmp() and ast_node.is_kernel_candidate():
+                self.print("#pragma omp parallel for")
+
             self.print(f"for(int {iterator} = {lower_range}; {iterator} < {upper_range}; {iterator}++) {{")
             self.generate_statement(ast_node.block)
             self.print("}")
diff --git a/src/pairs/code_gen/target.py b/src/pairs/code_gen/target.py
index 30acef5..ef569ea 100644
--- a/src/pairs/code_gen/target.py
+++ b/src/pairs/code_gen/target.py
@@ -14,6 +14,7 @@ class Target:
     Feature_AVX2 = 3
     Feature_AVX512 = 4
     Feature_GPU = 5
+    Feature_OpenMP = 6
 
     # Operating system
     OS_Unknown = 0
@@ -37,3 +38,6 @@ class Target:
 
     def is_gpu(self):
         return self.has_feature(Target.Feature_GPU)
+
+    def is_openmp(self):
+        return self.has_feature(Target.Feature_OpenMP)
-- 
GitLab