diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8b8226914ebd09be148e22b13ae30acdf4d5ccad..e5e64f58adc9305ca50a519910012e440bf6a805 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -108,6 +108,12 @@ if(COMPILE_CUDA)
     set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH})
     set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -rdc=true")
 
+    if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -g -G -O0 -DDEBUG")
+    else()
+        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O3")
+    endif()
+
     if(USE_WALBERLA)
         waLBerla_add_executable(
             NAME ${TARGET_BIN}
diff --git a/examples/dem_sd.py b/examples/dem_sd.py
index 3a8bdfa527935e4eff5f74bd6f7c9b44f3903e83..5bea2fd80c594e2da27cf99393804b9d5cd40c86 100644
--- a/examples/dem_sd.py
+++ b/examples/dem_sd.py
@@ -78,7 +78,7 @@ frictionCoefficient = 0.5
 restitutionCoefficient = 0.1
 collisionTime_SI = 5e-4
 poissonsRatio = 0.22
-timeSteps = 10000
+timeSteps = 1
 visSpacing = 200
 denseBottomLayer = False
 bottomLayerOffsetFactor = 1.0
@@ -101,7 +101,7 @@ psim = pairs.simulation(
     use_contact_history=False,
     particle_capacity=1000000,
     neighbor_capacity=20,
-    debug=True, generate_whole_program=False)
+    debug=True, generate_whole_program=True)
 
 if target == 'gpu':
     psim.target(pairs.target_gpu())
@@ -115,10 +115,10 @@ psim.add_property('linear_velocity', pairs.vector())
 psim.add_property('angular_velocity', pairs.vector())
 psim.add_property('force', pairs.vector(), volatile=True)
 psim.add_property('torque', pairs.vector(), volatile=True)
-psim.add_property('hydrodynamic_force', pairs.vector())
-psim.add_property('hydrodynamic_torque', pairs.vector())
-psim.add_property('old_hydrodynamic_force', pairs.vector())
-psim.add_property('old_hydrodynamic_torque', pairs.vector())
+# psim.add_property('hydrodynamic_force', pairs.vector())
+# psim.add_property('hydrodynamic_torque', pairs.vector())
+# psim.add_property('old_hydrodynamic_force', pairs.vector())
+# psim.add_property('old_hydrodynamic_torque', pairs.vector())
 psim.add_property('radius', pairs.real(), 1.0)
 psim.add_property('normal', pairs.vector())
 psim.add_property('inv_inertia', pairs.matrix())
@@ -134,7 +134,7 @@ psim.set_domain([0.0, 0.0, 0.0, domainSize_SI[0], domainSize_SI[1], domainSize_S
 psim.set_domain_partitioner(pairs.regular_domain_partitioner_xy())
 psim.pbc([False, False, False])
 psim.dem_sc_grid(
-    domainSize_SI[0], domainSize_SI[1], domainSize_SI[2]/2, generationSpacing_SI,
+    domainSize_SI[0], domainSize_SI[1], domainSize_SI[2], generationSpacing_SI,
     diameter_SI, minDiameter_SI, maxDiameter_SI, initialVelocity_SI, densityParticle_SI, ntypes)
 
 #psim.read_particle_data(
diff --git a/runtime/array.hpp b/runtime/array.hpp
index 03a538543ee1f465104b101ea7bb353d527c4f78..d422842f96cbdb6343c11ca370d76a3f94be29c3 100644
--- a/runtime/array.hpp
+++ b/runtime/array.hpp
@@ -24,14 +24,14 @@ public:
         PAIRS_ASSERT(size_ > 0);
     }
 
-    array_t getId() { return id; }
-    std::string getName() { return name; }
-    void *getHostPointer() { return h_ptr; }
-    void *getDevicePointer() { return d_ptr; }
+    array_t getId() const { return id; }
+    std::string getName() const { return name; }
+    void *getHostPointer() const { return h_ptr; }
+    void *getDevicePointer() const { return d_ptr; }
     void setPointers(void *h_ptr_, void *d_ptr_) { h_ptr = h_ptr_, d_ptr = d_ptr_; }
-    void setSize(size_t size_) { size = size_; }
-    size_t getSize() { return size; };
-    bool isStatic() { return is_static; }
+    void setSize(size_t size_) { size = size_;}
+    size_t getSize() const { return size; }
+    bool isStatic() const { return is_static; }
 };
 
 }
diff --git a/runtime/contact_property.hpp b/runtime/contact_property.hpp
index ff273434223e15d9c8dd17b23600be7dfe351ffa..be31b51a3dd22fdd5381da8f345d8608bd1666e4 100644
--- a/runtime/contact_property.hpp
+++ b/runtime/contact_property.hpp
@@ -26,16 +26,16 @@ public:
         PAIRS_ASSERT(type != Prop_Invalid && layout_ != Invalid && sx_ > 0 && sy_ > 0);
     }
 
-    property_t getId() { return id; }
-    std::string getName() { return name; }
-    void *getHostPointer() { return h_ptr; }
-    void *getDevicePointer() { return d_ptr; }
+    property_t getId() const { return id; }
+    std::string getName() const { return name; }
+    void *getHostPointer() const { return h_ptr; }
+    void *getDevicePointer() const { return d_ptr; }
     void setPointers(void *h_ptr_, void *d_ptr_) { h_ptr = h_ptr_, d_ptr = d_ptr_; }
     void setSizes(size_t sx_, size_t sy_) { sx = sx_, sy = sy_; }
-    size_t getTotalSize() { return sx * sy * getPrimitiveTypeSize(); };
-    PropertyType getType() { return type; }
-    layout_t getLayout() { return layout; }
-    size_t getPrimitiveTypeSize() {
+    size_t getTotalSize() const { return sx * sy * getPrimitiveTypeSize(); };
+    PropertyType getType() const { return type; }
+    layout_t getLayout() const { return layout; }
+    size_t getPrimitiveTypeSize() const {
         return  (type == Prop_Integer) ? sizeof(int) :
                 (type == Prop_UInt64) ? sizeof(unsigned long long int) :
                 (type == Prop_Real) ? sizeof(real_t) :
diff --git a/runtime/devices/cuda.cu b/runtime/devices/cuda.cu
index 38848b216477104a398aa59bc4d3e10509526e66..7ee69c1ad97e64ffac270ee0bce0802bd4187e69 100644
--- a/runtime/devices/cuda.cu
+++ b/runtime/devices/cuda.cu
@@ -96,6 +96,9 @@ __device__ int atomic_add(int *addr, int val) { return atomicAdd(addr, val); }
 __device__ real_t atomic_add(real_t *addr, real_t val) { return atomicAdd_double(addr, val); }
 __device__ int atomic_add_resize_check(int *addr, int val, int *resize, int capacity) {
     const int add_res = *addr + val;
+    
+    printf("atomic_add_resize_check::: add_res %d --- val %d --- capacity %d --- resize %d\n", add_res, val, capacity, *resize);
+    
     if(add_res >= capacity) {
         *resize = add_res;
         return *addr;
diff --git a/runtime/feature_property.hpp b/runtime/feature_property.hpp
index 025a514766a01643c21915e362a24481664af5fd..e3221b4cefcfdc20f7d70d612c2d7be3f90c89ae 100644
--- a/runtime/feature_property.hpp
+++ b/runtime/feature_property.hpp
@@ -22,13 +22,13 @@ public:
         nkinds(nkinds_),
         array_size(array_size_) {}
 
-    property_t getId() { return id; }
-    std::string getName() { return name; }
-    void *getHostPointer() { return h_ptr; }
-    void *getDevicePointer() { return d_ptr; }
-    PropertyType getType() { return type; }
-    size_t getNumberOfKinds() { return nkinds; }
-    size_t getArraySize() { return array_size; }
+    property_t getId() const { return id; }
+    std::string getName() const { return name; }
+    void *getHostPointer() const { return h_ptr; }
+    void *getDevicePointer() const { return d_ptr; }
+    PropertyType getType() const { return type; }
+    size_t getNumberOfKinds() const { return nkinds; }
+    size_t getArraySize() const { return array_size; }
 };
 
 }
diff --git a/runtime/property.hpp b/runtime/property.hpp
index fc082802a40e3ef6b054e8a0b13c82bebe150d02..301a46b23a8aca0322e420db6b64cd16ee7d39d4 100644
--- a/runtime/property.hpp
+++ b/runtime/property.hpp
@@ -31,17 +31,17 @@ public:
         PAIRS_ASSERT(type != Prop_Invalid && layout_ != Invalid && sx_ > 0 && sy_ > 0);
     }
 
-    property_t getId() { return id; }
-    std::string getName() { return name; }
-    void *getHostPointer() { return h_ptr; }
-    void *getDevicePointer() { return d_ptr; }
+    property_t getId() const { return id; }
+    std::string getName() const { return name; }
+    void *getHostPointer() const { return h_ptr; }
+    void *getDevicePointer() const { return d_ptr; }
     void setPointers(void *h_ptr_, void *d_ptr_) { h_ptr = h_ptr_, d_ptr = d_ptr_; }
     void setSizes(size_t sx_, size_t sy_) { sx = sx_, sy = sy_; }
-    size_t getTotalSize() { return sx * sy * getPrimitiveTypeSize(); };
-    PropertyType getType() { return type; }
-    layout_t getLayout() { return layout; }
-    int isVolatile() { return vol != 0; }
-    size_t getPrimitiveTypeSize() {
+    size_t getTotalSize() const { return sx * sy * getPrimitiveTypeSize(); };
+    PropertyType getType() const { return type; }
+    layout_t getLayout() const { return layout; }
+    int isVolatile() const { return vol != 0; }
+    size_t getPrimitiveTypeSize() const {
         return  (type == Prop_Integer) ? sizeof(int) :
                 (type == Prop_UInt64) ? sizeof(unsigned long long int) :
                 (type == Prop_Real) ? sizeof(real_t) :
diff --git a/src/pairs/code_gen/cgen.py b/src/pairs/code_gen/cgen.py
index bfdd48c9134531c61dd2f32989da4ab4b11b0e80..8af9bbadca2c22537df913a3fcee31c771f7d8ba 100644
--- a/src/pairs/code_gen/cgen.py
+++ b/src/pairs/code_gen/cgen.py
@@ -254,21 +254,21 @@ class CGen:
     def generate_pairs_object_structure(self):
 
         self.print("")
-
+        externkw = "" if self.sim._generate_whole_program else "extern "
         if self.target.is_gpu():
             for array in self.sim.arrays.statics():
                 if array.device_flag:
                     t = array.type()
                     tkw = Types.c_keyword(self.sim, t)
                     size = self.generate_expression(ScalarOp.inline(array.alloc_size()))
-                    self.print(f"extern __constant__ {tkw} d_{array.name()}[{size}];")
+                    self.print(f"{externkw}__constant__ {tkw} d_{array.name()}[{size}];")
 
             for feature_prop in self.sim.feature_properties:
                 if feature_prop.device_flag:
                     t = feature_prop.type()
                     tkw = Types.c_keyword(self.sim, t)
                     size = feature_prop.array_size()
-                    self.print(f"extern __constant__ {tkw} d_{feature_prop.name()}[{size}];")
+                    self.print(f"{externkw}__constant__ {tkw} d_{feature_prop.name()}[{size}];")
 
         self.print("")
         self.print("struct PairsObjects {")
@@ -501,7 +501,7 @@ class CGen:
             device_cond = module.run_on_device and self.target.is_gpu()
 
             if self.debug:
-                self.print(f"PAIRS_DEBUG(\"{module.name}\\n\");")
+                self.print(f"PAIRS_DEBUG(\"\\n{module.name}\\n\");")
 
             for var in module.read_only_variables():
                 type_kw = Types.c_keyword(self.sim, var.type())
@@ -519,7 +519,7 @@ class CGen:
                 type_kw = Types.c_keyword(self.sim, array.type())
                 name = array.name() if not device_cond else f"d_{array.name()}"
                 # self.generate_full_object_names = True
-                if not array.is_static() or (array.is_static() and not module.run_on_device):
+                if not array.is_static() or (array.is_static() and not device_cond):
                     self.print(f"{type_kw} *{array.name()} = pobj->{name};")
                     # self.print(f"{type_kw} *{array.name()} = {self.generate_object_reference(array, device=device_cond)};")
                 # self.generate_full_object_names = False
@@ -563,6 +563,7 @@ class CGen:
 
     def generate_kernel(self, kernel):
         kernel_params = "int range_start"
+        has_resizes = False
         for var in kernel.read_only_variables():
             type_kw = Types.c_keyword(self.sim, var.type())
             decl = f"{type_kw} {var.name()}"
@@ -579,6 +580,8 @@ class CGen:
             type_kw = Types.c_keyword(self.sim, array.type())
             decl = f"{type_kw} *{array.name()}"
             kernel_params += f", {decl}"
+            if array.name() == "resizes":
+                has_resizes = True
 
         for prop in kernel.properties():
             type_kw = Types.c_keyword(self.sim, prop.type())
@@ -611,7 +614,15 @@ class CGen:
         self.print(f"    const int {kernel.iterator.name()} = blockIdx.x * blockDim.x + threadIdx.x + range_start;")
         self.print.add_indent(4)
         self.kernel_context = True
+
+        if has_resizes:
+            self.print(f"printf(\"{kernel.name} @@@@@@@@ before kernel: resizes[0] = %d\\n\", resizes[0]);")
+
         self.generate_statement(kernel.block)
+
+        if has_resizes:
+            self.print(f"printf(\"{kernel.name} @@@@@@@@ after kernel: resizes[0] = %d\\n\", resizes[0]);")
+
         self.kernel_context = False
         self.print.add_indent(-4)
         self.print("}")
@@ -652,7 +663,10 @@ class CGen:
             if ast_node.check_for_resize():
                 resize = self.generate_expression(ast_node.resize)
                 capacity = self.generate_expression(ast_node.capacity)
+                self.print(f"printf (\" %d -- before AtomicInc: nsend = %d -- send_capacity = %d -- resizes[0] = %d\\n\", {Printer.line_id}, {elem}, {capacity}, {resize});")
                 self.print(f"pairs::{prefix}atomic_add_resize_check(&({elem}), {value}, &({resize}), {capacity});")
+                self.print(f"printf (\" %d -- after AtomicInc: nsend = %d -- send_capacity = %d -- resizes[0] = %d\\n\", {Printer.line_id}, {elem}, {capacity}, {resize});")
+
             else:
                 self.print(f"pairs::{prefix}atomic_add(&({elem}), {value});")
 
@@ -852,7 +866,9 @@ class CGen:
                 self.print(f"pairs_runtime->copyArrayTo{ctx_suffix}({array_id}, {action}, {size}); // {array_name}")
 
             else:
+                self.print(f"std::cout<< \"{Printer.line_id} -- before {array_name} copyArrayTo{ctx_suffix}({action}) === \" <<  pobj->{array_name}[0]  << \" \" << pobj->{array_name}[1]  << \" \" << pobj->{array_name}[2]  << std::endl;")
                 self.print(f"pairs_runtime->copyArrayTo{ctx_suffix}({array_id}, {action}); // {array_name}")
+                self.print(f"std::cout<< \"{Printer.line_id} -- after {array_name} copyArrayTo{ctx_suffix}({action}) === \" <<  pobj->{array_name}[0]  << \" \" << pobj->{array_name}[1]  << \" \" << pobj->{array_name}[2]  << std::endl;")
 
         if isinstance(ast_node, CopyContactProperty):
             prop_id = ast_node.contact_prop().id()
diff --git a/src/pairs/code_gen/printer.py b/src/pairs/code_gen/printer.py
index 4d73b7679f7f8c52ff1878339dbc0479cb9db215..3eabef6f95c22b6d2f16d168a97437cb1fe00d9f 100644
--- a/src/pairs/code_gen/printer.py
+++ b/src/pairs/code_gen/printer.py
@@ -1,4 +1,6 @@
 class Printer:
+
+    line_id = 0
     def __init__(self, output):
         self.output = output
         self.stream = None
@@ -16,4 +18,5 @@ class Printer:
 
     def __call__(self, text):
         assert self.stream is not None, "Invalid stream!"
+        Printer.line_id += 1
         self.stream.write(self.indent * ' ' + text + '\n')
diff --git a/src/pairs/ir/kernel.py b/src/pairs/ir/kernel.py
index 04def29cf9153a8ec7f5048f79579bd062186dd0..d40be9e355afc5c1b31ea24abf6605825f8695de 100644
--- a/src/pairs/ir/kernel.py
+++ b/src/pairs/ir/kernel.py
@@ -190,7 +190,7 @@ class KernelLaunch(ASTNode):
         self._iterator = iterator
         self._range_min = range_min
         self._range_max = range_max
-        self._threads_per_block = Lit.cvt(sim, 32)
+        self._threads_per_block = Lit.cvt(sim, 1)
         self._nelems = (range_max - range_min) 
         self._nblocks = (self._nelems + self._threads_per_block - 1) / self._threads_per_block
 
diff --git a/src/pairs/sim/comm.py b/src/pairs/sim/comm.py
index dace3c9aeed7be47f1c120dcd05034628c6b1f64..988e71d0bcb2f325b5582c819fd010e221ab50ca 100644
--- a/src/pairs/sim/comm.py
+++ b/src/pairs/sim/comm.py
@@ -9,7 +9,7 @@ from pairs.ir.contexts import Contexts
 from pairs.ir.device import CopyArray
 from pairs.ir.functions import Call_Void
 from pairs.ir.loops import For, ParticleFor, While
-from pairs.ir.print import Print
+from pairs.ir.print import Print, PrintCode
 from pairs.ir.select import Select
 from pairs.ir.sizeof import Sizeof
 from pairs.ir.types import Types
@@ -240,6 +240,7 @@ class DetermineGhostParticles(Lowerable):
         self.sim.check_resize(self.comm.send_capacity, nsend)
         #self.sim.check_resize(self.comm.send_capacity, nsend_all)
 
+        PrintCode(self.sim, f"std::cout << \"resizes[0] {self.sim._module_name} ========== \" << pobj->resizes[0] << std::endl;")
         if is_exchange:
             for i in ParticleFor(self.sim):
                 Assign(self.sim, exchg_flag[i], 0)