From cfcfb9929af9d7a13993c1be2a6a1f93e756d4cf Mon Sep 17 00:00:00 2001
From: Rafael Ravedutti <rafaelravedutti@gmail.com>
Date: Wed, 22 Feb 2023 02:07:56 +0100
Subject: [PATCH] Use host atomics when not within GPU context

Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
---
 runtime/devices/device.hpp    | 21 +++++++++++++++++++--
 runtime/devices/dummy.cpp     | 14 --------------
 src/pairs/analysis/modules.py |  4 ++++
 src/pairs/code_gen/cgen.py    |  5 +++--
 src/pairs/ir/atomic.py        |  1 +
 5 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/runtime/devices/device.hpp b/runtime/devices/device.hpp
index 07d671b..3ef88ef 100644
--- a/runtime/devices/device.hpp
+++ b/runtime/devices/device.hpp
@@ -19,6 +19,21 @@ __host__ void copy_to_host(const void *d_ptr, void *h_ptr, size_t count);
 __host__ void copy_static_symbol_to_device(void *h_ptr, const void *d_ptr, size_t count);
 __host__ void copy_static_symbol_to_host(void *d_ptr, const void *h_ptr, size_t count);
 
+inline __host__ int host_atomic_add(int *addr, int val) {
+    *addr += val;
+    return *addr - val;
+}
+
+inline __host__ int host_atomic_add_resize_check(int *addr, int val, int *resize, int capacity) {
+    const int add_res = *addr + val;
+    if(add_res >= capacity) {
+        *resize = add_res;
+        return *addr;
+    }
+
+    return host_atomic_add(addr, val);
+}
+
 #ifdef PAIRS_TARGET_CUDA
 __device__ int atomic_add(int *addr, int val) { return atomicAdd(addr, val); }
 __device__ int atomic_add_resize_check(int *addr, int val, int *resize, int capacity) {
@@ -31,8 +46,10 @@ __device__ int atomic_add_resize_check(int *addr, int val, int *resize, int capa
     return atomic_add(addr, val);
 }
 #else
-int atomic_add(int *addr, int val);
-int atomic_add_resize_check(int *addr, int val, int *resize, int capacity);
+inline int atomic_add(int *addr, int val) { return host_atomic_add(addr, val); }
+inline int atomic_add_resize_check(int *addr, int val, int *resize, int capacity) {
+    return host_atomic_add_resize_check(addr, val, resize, capacity);
+}
 #endif
 
 }
diff --git a/runtime/devices/dummy.cpp b/runtime/devices/dummy.cpp
index c93712d..de6a8df 100644
--- a/runtime/devices/dummy.cpp
+++ b/runtime/devices/dummy.cpp
@@ -9,19 +9,5 @@ void copy_to_device(void const *h_ptr, void *d_ptr, size_t count) {}
 void copy_to_host(void const *d_ptr, void *h_ptr, size_t count) {}
 void copy_static_symbol_to_device(void *h_ptr, const void *d_ptr, size_t count) {}
 void copy_static_symbol_to_host(void *d_ptr, const void *h_ptr, size_t count) {}
-int atomic_add(int *addr, int val) {
-    *addr += val;
-    return *addr - val;
-}
-
-int atomic_add_resize_check(int *addr, int val, int *resize, int capacity) {
-    const int add_res = *addr + val;
-    if(add_res >= capacity) {
-        *resize = add_res;
-        return *addr;
-    }
-
-    return atomic_add(addr, val);
-}
 
 }
diff --git a/src/pairs/analysis/modules.py b/src/pairs/analysis/modules.py
index 8bf4930..c75ded4 100644
--- a/src/pairs/analysis/modules.py
+++ b/src/pairs/analysis/modules.py
@@ -29,6 +29,10 @@ class FetchModulesReferences(Visitor):
         self.writing = False
         self.visit(ast_node.value)
 
+        for m in self.module_stack:
+            if m.run_on_device:
+                ast_node.device_flag = True
+
         if ast_node.resize is not None:
             self.visit(ast_node.resize)
             self.visit(ast_node.capacity)
diff --git a/src/pairs/code_gen/cgen.py b/src/pairs/code_gen/cgen.py
index 270a8b1..9e40f1d 100644
--- a/src/pairs/code_gen/cgen.py
+++ b/src/pairs/code_gen/cgen.py
@@ -255,13 +255,14 @@ class CGen:
                 value = self.generate_expression(atomic_add.value)
                 tkw = Types.c_keyword(atomic_add.type())
                 acc_ref = f"atm_add{atomic_add.id()}"
+                prefix = "" if ast_node.elem.device_flag else "host_"
 
                 if atomic_add.check_for_resize():
                     resize = self.generate_expression(atomic_add.resize)
                     capacity = self.generate_expression(atomic_add.capacity)
-                    self.print(f"const {tkw} {acc_ref} = pairs::atomic_add_resize_check(&({elem}), {value}, &({resize}), {capacity});")
+                    self.print(f"const {tkw} {acc_ref} = pairs::{prefix}atomic_add_resize_check(&({elem}), {value}, &({resize}), {capacity});")
                 else:
-                    self.print(f"const {tkw} {acc_ref} = pairs::atomic_add(&({elem}), {value});")
+                    self.print(f"const {tkw} {acc_ref} = pairs::{prefix}atomic_add(&({elem}), {value});")
 
         if isinstance(ast_node, Branch):
             cond = self.generate_expression(ast_node.cond)
diff --git a/src/pairs/ir/atomic.py b/src/pairs/ir/atomic.py
index c492b35..9de1c9e 100644
--- a/src/pairs/ir/atomic.py
+++ b/src/pairs/ir/atomic.py
@@ -16,6 +16,7 @@ class AtomicAdd(ASTTerm):
         self.value = Lit.cvt(sim, value)
         self.resize = None
         self.capacity = None
+        self.device_flag = False
 
     def __str__(self):
         return f"AtomicAdd<{self.elem, self.val}>"
-- 
GitLab