From 70f7e0edb31bc2c7f6ad0b379fd3365e8cca5426 Mon Sep 17 00:00:00 2001
From: Behzad Safaei <iwia103h@a0128.nhr.fau.de>
Date: Sun, 8 Dec 2024 20:26:28 +0100
Subject: [PATCH] Resolve GPU segfault

---
 examples/dem_sd.py                   | 8 ++++----
 runtime/devices/cuda.cu              | 2 +-
 runtime/pairs.hpp                    | 6 ++++--
 src/pairs/ir/kernel.py               | 2 +-
 src/pairs/sim/comm.py                | 2 +-
 src/pairs/sim/simulation.py          | 6 +++---
 src/pairs/transformations/modules.py | 2 +-
 7 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/examples/dem_sd.py b/examples/dem_sd.py
index 96e1c05..c0e0212 100644
--- a/examples/dem_sd.py
+++ b/examples/dem_sd.py
@@ -137,9 +137,9 @@ psim.set_domain([0.0, 0.0, 0.0, domainSize_SI[0], domainSize_SI[1], domainSize_S
 psim.set_domain_partitioner(pairs.block_forest())
 # psim.set_domain_partitioner(pairs.regular_domain_partitioner())
 psim.pbc([False, False, False])
-# psim.dem_sc_grid(
-#     domainSize_SI[0], domainSize_SI[1], domainSize_SI[2], generationSpacing_SI,
-#     diameter_SI, minDiameter_SI, maxDiameter_SI, initialVelocity_SI, densityParticle_SI, ntypes)
+psim.dem_sc_grid(
+    domainSize_SI[0], domainSize_SI[1], domainSize_SI[2], generationSpacing_SI,
+    diameter_SI, minDiameter_SI, maxDiameter_SI, initialVelocity_SI, densityParticle_SI, ntypes)
 
 #psim.read_particle_data(
 #    "data/spheres.input",
@@ -165,7 +165,7 @@ psim.setup(update_mass_and_inertia, {'densityParticle_SI': densityParticle_SI,
 
 #psim.compute_half()
 psim.build_cell_lists(linkedCellWidth)
-psim.vtk_output(f"output/dem_{target}", frequency=visSpacing)
+# psim.vtk_output(f"output/dem_{target}", frequency=visSpacing)
 
 psim.compute(gravity,
              symbols={'densityParticle_SI': densityParticle_SI,
diff --git a/runtime/devices/cuda.cu b/runtime/devices/cuda.cu
index 52e8825..4caad6d 100644
--- a/runtime/devices/cuda.cu
+++ b/runtime/devices/cuda.cu
@@ -97,7 +97,7 @@ __device__ real_t atomic_add(real_t *addr, real_t val) { return atomicAdd_double
 __device__ int atomic_add_resize_check(int *addr, int val, int *resize, int capacity) {
     const int add_res = *addr + val;
     
-    printf("atomic_add_resize_check::: add_res %d --- val %d --- capacity %d --- resize %d\n", add_res, val, capacity, *resize);
+    // printf("atomic_add_resize_check::: add_res %d --- val %d --- capacity %d --- resize %d\n", add_res, val, capacity, *resize);
     
     if(add_res >= capacity) {
         *resize = add_res;
diff --git a/runtime/pairs.hpp b/runtime/pairs.hpp
index 56d4022..27892e4 100644
--- a/runtime/pairs.hpp
+++ b/runtime/pairs.hpp
@@ -60,8 +60,10 @@ public:
 
     // Variables
     template<typename T>
-    RuntimeVar<T> addDeviceVariable(T *h_ptr) {
-       return RuntimeVar<T>(h_ptr); 
+    RuntimeVar<T> &addDeviceVariable(T *h_ptr) {
+        // TODO: Proper memory mangement for RuntimeVar variables
+        RuntimeVar<T> *ret = new RuntimeVar<T>(h_ptr);
+        return *ret; 
     }
 
     void trackVariable(std::string variable_name, void *ptr) {
diff --git a/src/pairs/ir/kernel.py b/src/pairs/ir/kernel.py
index d40be9e..04def29 100644
--- a/src/pairs/ir/kernel.py
+++ b/src/pairs/ir/kernel.py
@@ -190,7 +190,7 @@ class KernelLaunch(ASTNode):
         self._iterator = iterator
         self._range_min = range_min
         self._range_max = range_max
-        self._threads_per_block = Lit.cvt(sim, 1)
+        self._threads_per_block = Lit.cvt(sim, 32)
         self._nelems = (range_max - range_min) 
         self._nblocks = (self._nelems + self._threads_per_block - 1) / self._threads_per_block
 
diff --git a/src/pairs/sim/comm.py b/src/pairs/sim/comm.py
index 6ca1fc8..540df8c 100644
--- a/src/pairs/sim/comm.py
+++ b/src/pairs/sim/comm.py
@@ -291,7 +291,7 @@ class DetermineGhostParticles(Lowerable):
         self.sim.check_resize(self.comm.send_capacity, nsend)
         #self.sim.check_resize(self.comm.send_capacity, nsend_all)
 
-        PrintCode(self.sim, f"std::cout << \"resizes[0] {self.sim._module_name} ========== \" << pobj->resizes[0] << std::endl;")
+        # PrintCode(self.sim, f"std::cout << \"resizes[0] {self.sim._module_name} ========== \" << pobj->resizes[0] << std::endl;")
         if is_exchange:
             for i in ParticleFor(self.sim):
                 Assign(self.sim, exchg_flag[i], 0)
diff --git a/src/pairs/sim/simulation.py b/src/pairs/sim/simulation.py
index acab2a5..870231b 100644
--- a/src/pairs/sim/simulation.py
+++ b/src/pairs/sim/simulation.py
@@ -482,12 +482,12 @@ class Simulation:
 
             timestep_procedures.append(ResetContactHistoryUsageStatus(self, self._contact_history))
 
-        # Reset volatile properties and add computational kernels
-        timestep_procedures += [ResetVolatileProperties(self)]
-
         # add computational kernels
         timestep_procedures += self.functions
 
+        # Reset volatile properties
+        timestep_procedures += [ResetVolatileProperties(self)]
+
         # For whole-program-generation, add reverse_comm wherever needed in the timestep loop (eg: after computational kernels) like this:
         if self._generate_whole_program:
             timestep_procedures += [reverse_comm_module]
diff --git a/src/pairs/transformations/modules.py b/src/pairs/transformations/modules.py
index 8fed95e..4b53830 100644
--- a/src/pairs/transformations/modules.py
+++ b/src/pairs/transformations/modules.py
@@ -195,7 +195,7 @@ class ReplaceModulesByCalls(Mutator):
 
                 resize_stmts.append(
                     Filter(sim, sim.resizes[resize_id] > 0, Block(sim,
-                        [Print(sim, f"resizes[{resize_id}] -> {capacity.name()}")] +
+                        # [Print(sim, f"resizes[{resize_id}] = " , sim.resizes[resize_id], f" {capacity.name()} = ", capacity)] +
                         [Assign(sim, capacity, self.grow_fn(sim.resizes[resize_id]))] +
                         [a.realloc() for a in capacity.bonded_arrays()] +
                         props_realloc)))
-- 
GitLab