diff --git a/Makefile b/Makefile
index 3f07f7168d85a17f57344cb146370db50a146310..18bad17bd043138c6056c4f667cd0c9c6df860de 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 .PHONY: all build clean
 
-all: build lj_cpu lj_gpu
+all: clean build lj_cpu lj_gpu
 	@echo "Everything was done!"
 
 build:
diff --git a/src/pairs/analysis/modules.py b/src/pairs/analysis/modules.py
index 45ffd3dff0d60e4ea64aa6adccc84e9061130b85..5e33c8ecea7cf1f6e6534e31c1bc0fe674c30e08 100644
--- a/src/pairs/analysis/modules.py
+++ b/src/pairs/analysis/modules.py
@@ -52,6 +52,7 @@ class FetchModulesReferences(Visitor):
 
     def visit_Var(self, ast_node):
         for m in self.module_stack:
-            m.add_variable(ast_node, self.writing)
-            if m.run_on_device:
-                ast_node.device_flag = True
+            if not ast_node.temporary():
+                m.add_variable(ast_node, self.writing)
+                if m.run_on_device:
+                    ast_node.device_flag = True
diff --git a/src/pairs/code_gen/cgen.py b/src/pairs/code_gen/cgen.py
index faa0acb5a0db37ad94724069ec5e070244ef6495..648acd3c4aaa17f60ad42b790e88dc8448b09309 100644
--- a/src/pairs/code_gen/cgen.py
+++ b/src/pairs/code_gen/cgen.py
@@ -132,6 +132,11 @@ class CGen:
                 self.print(f"PAIRS_DEBUG(\"{module.name}\\n\");")
                 self.print.add_indent(-4)
 
+            self.print.add_indent(4)
+            for t in module.temporaries:
+                self.generate_statement(VarDecl(module.sim, t))
+            self.print.add_indent(-4)
+
             self.generate_statement(module.block)
             self.print("}")
 
diff --git a/src/pairs/ir/block.py b/src/pairs/ir/block.py
index e0e60194b876cfd3f8b92ec6428e316c52f4cfd7..27c7d5716177fa9c019a167885a15748affcfbfe 100644
--- a/src/pairs/ir/block.py
+++ b/src/pairs/ir/block.py
@@ -22,7 +22,8 @@ def pairs_host_block(func):
             block=Block(sim, sim._block),
             resizes_to_check=sim._resizes_to_check,
             check_properties_resize=sim._check_properties_resize,
-            run_on_device=False)
+            run_on_device=False,
+            temps=sim._module_temps)
 
     return inner
 
@@ -37,7 +38,8 @@ def pairs_device_block(func):
             block=Block(sim, sim._block),
             resizes_to_check=sim._resizes_to_check,
             check_properties_resize=sim._check_properties_resize,
-            run_on_device=True)
+            run_on_device=True,
+            temps=sim._module_temps)
 
     return inner
 
diff --git a/src/pairs/ir/module.py b/src/pairs/ir/module.py
index 34615483a7f1ba348c2f04bb815e36840e78c36d..c960ff1657451b1bcae7a919a90ffdf5410439a4 100644
--- a/src/pairs/ir/module.py
+++ b/src/pairs/ir/module.py
@@ -7,10 +7,11 @@ from pairs.ir.variables import Var
 class Module(ASTNode):
     last_module = 0
 
-    def __init__(self, sim, name=None, block=None, resizes_to_check={}, check_properties_resize=False, run_on_device=False):
+    def __init__(self, sim, name=None, block=None, resizes_to_check={}, check_properties_resize=False, run_on_device=False, temps=[]):
         super().__init__(sim)
         self._id = Module.last_module
         self._name = name if name is not None else "module" + str(Module.last_module)
+        self._temps = temps
         self._variables = {}
         self._arrays = {}
         self._properties = {}
@@ -38,6 +39,10 @@ class Module(ASTNode):
     def run_on_device(self):
         return self._run_on_device
 
+    @property
+    def temporaries(self):
+        return self._temps
+
     def variables(self):
         return self._variables
 
diff --git a/src/pairs/ir/variables.py b/src/pairs/ir/variables.py
index 764c6d25205ef436185ec15fce8531e89fdf9f06..9703ca537b3bfdd97cadde36921e91b095525a39 100644
--- a/src/pairs/ir/variables.py
+++ b/src/pairs/ir/variables.py
@@ -24,7 +24,7 @@ class Variables:
     def add_temp(self, init):
         lit = Lit.cvt(self.sim, init)
         tmp_id = Variables.new_temp_id()
-        tmp_var = Var(self.sim, f"tmp{tmp_id}", lit.type())
+        tmp_var = Var(self.sim, f"tmp{tmp_id}", lit.type(), temp=True)
         self.sim.add_statement(Assign(self.sim, tmp_var, lit))
         return tmp_var
 
@@ -37,11 +37,12 @@ class Variables:
 
 
 class Var(ASTTerm):
-    def __init__(self, sim, var_name, var_type, init_value=0):
+    def __init__(self, sim, var_name, var_type, init_value=0, temp=False):
         super().__init__(sim)
         self.var_name = var_name
         self.var_type = var_type
         self.var_init_value = init_value
+        self.var_temporary = temp
         self.mutable = True
         self.var_bonded_arrays = []
         self.device_flag = False
@@ -61,6 +62,9 @@ class Var(ASTTerm):
     def type(self):
         return self.var_type
 
+    def temporary(self):
+        return self.var_temporary
+
     def set_initial_value(self, value):
         self.var_init_value = value
 
diff --git a/src/pairs/sim/comm.py b/src/pairs/sim/comm.py
index 2239cc399df680a80f1a3b1c657755803beb9c30..527ef6137e9820b5631ea24765f476e2683183cf 100644
--- a/src/pairs/sim/comm.py
+++ b/src/pairs/sim/comm.py
@@ -11,22 +11,24 @@ from pairs.sim.lowerable import Lowerable
 
 
 class Comm:
-    def __init__(self, sim, dom_part, max_neigh_ranks=6, max_buffer_elems=7):
+    def __init__(self, sim, dom_part):
         self.sim = sim
         self.dom_part = dom_part
-        self.nsend_all     = sim.add_var('nsend_all', Types.Int32)
-        self.send_capacity = sim.add_var('send_capacity', Types.Int32, 100)
-        self.recv_capacity = sim.add_var('recv_capacity', Types.Int32, 100)
-        self.nsend         = sim.add_array('nsend', [max_neigh_ranks], Types.Int32)
-        self.send_buffer   = sim.add_array('send_buffer', [self.send_capacity, max_buffer_elems], Types.Double)
-        self.send_map      = sim.add_array('send_map', [self.send_capacity], Types.Int32)
-        self.exchg_flag    = sim.add_array('exchg_flag', [sim.particle_capacity], Types.Int32)
-        self.exchg_copy_to = sim.add_array('exchg_copy_to', [self.send_capacity], Types.Int32)
-        self.send_mult     = sim.add_array('send_mult', [self.send_capacity, sim.ndims()], Types.Int32)
-        self.nrecv         = sim.add_array('nrecv', [max_neigh_ranks], Types.Int32)
-        self.recv_buffer   = sim.add_array('recv_buffer', [self.recv_capacity, max_buffer_elems], Types.Double)
-        self.recv_map      = sim.add_array('recv_map', [self.recv_capacity], Types.Int32)
-        self.recv_mult     = sim.add_array('recv_mult', [self.recv_capacity, sim.ndims()], Types.Int32)
+        self.nsend_all      = sim.add_var('nsend_all', Types.Int32)
+        self.send_capacity  = sim.add_var('send_capacity', Types.Int32, 100)
+        self.recv_capacity  = sim.add_var('recv_capacity', Types.Int32, 100)
+        self.elem_capacity  = sim.add_var('elem_capacity', Types.Int32, 10)
+        self.neigh_capacity = sim.add_var('neigh_capacity', Types.Int32, 6)
+        self.nsend          = sim.add_array('nsend', [self.neigh_capacity], Types.Int32)
+        self.send_buffer    = sim.add_array('send_buffer', [self.send_capacity, self.elem_capacity], Types.Double)
+        self.send_map       = sim.add_array('send_map', [self.send_capacity], Types.Int32)
+        self.exchg_flag     = sim.add_array('exchg_flag', [sim.particle_capacity], Types.Int32)
+        self.exchg_copy_to  = sim.add_array('exchg_copy_to', [self.send_capacity], Types.Int32)
+        self.send_mult      = sim.add_array('send_mult', [self.send_capacity, sim.ndims()], Types.Int32)
+        self.nrecv          = sim.add_array('nrecv', [self.neigh_capacity], Types.Int32)
+        self.recv_buffer    = sim.add_array('recv_buffer', [self.recv_capacity, self.elem_capacity], Types.Double)
+        self.recv_map       = sim.add_array('recv_map', [self.recv_capacity], Types.Int32)
+        self.recv_mult      = sim.add_array('recv_mult', [self.recv_capacity, sim.ndims()], Types.Int32)
 
     @pairs_inline
     def synchronize(self):
@@ -38,7 +40,7 @@ class Comm:
     @pairs_inline
     def borders(self):
         prop_list = [self.sim.property(p) for p in ['mass', 'position']]
-        for d in For(self.sim, 0, self.sim.ndims()):
+        for d in range(self.sim.ndims()):
             DetermineGhostParticles(self, d, self.sim.cell_spacing())
             PackGhostParticles(self, prop_list)
             CommunicateSizes(self)
@@ -48,7 +50,7 @@ class Comm:
     @pairs_inline
     def exchange(self):
         prop_list = [self.sim.property(p) for p in ['mass', 'position', 'velocity']]
-        for d in For(self.sim, 0, self.sim.ndims()):
+        for d in range(self.sim.ndims()):
             DetermineGhostParticles(self, d, 0.0)
             PackGhostParticles(self, prop_list)
             RemoveExchangedParticles_part1(self)
@@ -66,7 +68,7 @@ class CommunicateSizes(Lowerable):
 
     @pairs_inline
     def lower(self):
-        Call_Void(self.sim, "pairs->communicateSizes", [self.nsend, self.nrecv])
+        Call_Void(self.sim, "pairs->communicateSizes", [self.comm.nsend, self.comm.nrecv])
 
 
 class CommunicateData(Lowerable):
@@ -77,8 +79,9 @@ class CommunicateData(Lowerable):
 
     @pairs_inline
     def lower(self):
-        elem_size = sum([self.sim.ndims() if p.type() == Types.Vector else 1])
-        Call_Void(self.sim, "pairs->communicateData", [self.send_buffer, self.nsend, self.recv_buffer, self.nrecv, elem_size])
+        elem_size = sum([self.sim.ndims() if p.type() == Types.Vector else 1 for p in self.prop_list])
+        Call_Void(self.sim, "pairs->communicateData", [self.comm.send_buffer, self.comm.nsend,
+                                                       self.comm.recv_buffer, self.comm.nrecv, elem_size])
 
 
 class DetermineGhostParticles(Lowerable):
@@ -101,10 +104,10 @@ class DetermineGhostParticles(Lowerable):
         nb_rank_id = 0
         nsend_all.set(0)
         for i, _, pbc in self.comm.dom_part.ghost_particles(self.step, self.sim.position(), self.spacing):
-            n = AtomicAdd(self.sim, nsend_all, 1)
-            send_map[n].set(i)
+            next_idx = AtomicAdd(self.sim, nsend_all, 1)
+            send_map[next_idx].set(i)
             for d in range(self.sim.ndims()):
-                send_mult[n][d].set(pbc[d])
+                send_mult[next_idx][d].set(pbc[d])
 
             nsend[nb_rank_id].add(1)
             nb_rank_id += 1
@@ -130,20 +133,22 @@ class PackGhostParticles(Lowerable):
 
         for i in For(self.sim, 0, self.comm.nsend):
             p_offset = 0
+            m = send_map[i]
+            buffer_index = i * elems_per_particle
             for p in self.prop_list:
                 if p.type() == Types.Vector:
                     for d in range(self.sim.ndims()):
-                        src = p[send_map[i]][d]
+                        src = p[m][d]
                         if p == self.sim.position():
                             src += send_mult[i][d] * self.sim.grid.length(d)
 
-                        send_buffer[i * elems_per_particle + p_offset + d].set(src)
+                        send_buffer[buffer_index][p_offset + d].set(src)
 
                     p_offset += self.sim.ndims()
 
                 else:
                     cast_fn = lambda x: Cast(self.sim, x, Types.Double) if p.type() != Types.Double else x
-                    send_buffer[i * elems_per_particle + p_offset].set(cast_fn(p[send_map[i]]))
+                    send_buffer[buffer_index][p_offset].set(cast_fn(p[m]))
                     p_offset += 1
 
             
@@ -166,16 +171,17 @@ class UnpackGhostParticles(Lowerable):
 
         for i in For(self.sim, 0, self.comm.nrecv):
             p_offset = 0
+            buffer_index = i * elems_per_particle
             for p in self.prop_list:
                 if p.type() == Types.Vector:
                     for d in range(self.sim.ndims()):
-                        p[nlocal + i][d].set(recv_buffer[i * elems_per_particle + p_offset + d])
+                        p[nlocal + i][d].set(recv_buffer[buffer_index][p_offset + d])
                         
                     p_offset += self.sim.ndims()
 
                 else:
                     cast_fn = lambda x: Cast(self.sim, x, p.type()) if p.type() != Types.Double else x
-                    p[nlocal + i].set(cast_fn(recv_buffer[i * elems_per_particle + p_offset]))
+                    p[nlocal + i].set(cast_fn(recv_buffer[buffer_index][p_offset]))
                     p_offset += 1
 
 
@@ -187,8 +193,8 @@ class RemoveExchangedParticles_part1(Lowerable):
 
     @pairs_host_block
     def lower(self):
-        send_pos = self.sim.add_temp_var(self.sim.nparticles)
         self.sim.module_name("remove_exchanged_particles_pt1")
+        send_pos = self.sim.add_temp_var(self.sim.nparticles)
         for i in For(self.sim, 0, self.comm.nsend):
             for is_local in Branch(self.sim, self.comm.send_map[i] < self.sim.nlocal - self.comm.nsend):
                 if is_local:
diff --git a/src/pairs/sim/domain_partitioning.py b/src/pairs/sim/domain_partitioning.py
index af3c54ca42e6384e315f5c4e9bb27dca8f9c2026..73a9a72fb8e5bffb22710543b66bec20011ebb5c 100644
--- a/src/pairs/sim/domain_partitioning.py
+++ b/src/pairs/sim/domain_partitioning.py
@@ -20,10 +20,10 @@ class DimensionRanges:
     def ghost_particles(self, step, position, offset=0.0):
         for i in For(self.sim, 0, self.sim.nlocal + self.sim.nghost):
             j = step * 2 + 0
-            for _ in Filter(self.sim, position < self.subdom[j] + offset):
+            for _ in Filter(self.sim, position[i][step] < self.subdom[j] + offset):
                 yield i, self.neighbor_ranks[j], [0 if d != step else self.pbc[j] for d in range(self.sim.ndims())]
 
         for i in For(self.sim, 0, self.sim.nlocal + self.sim.nghost):
             j = step * 2 + 1
-            for _ in Filter(self.sim, position > self.subdom[j] - offset):
+            for _ in Filter(self.sim, position[i][step] > self.subdom[j] - offset):
                 yield i, self.neighbor_ranks[j], [0 if d != step else self.pbc[j] for d in range(self.sim.ndims())]
diff --git a/src/pairs/sim/simulation.py b/src/pairs/sim/simulation.py
index f5df6d3f91b0d2dd09901b02cbf1f9bb9c4085af..a7d9edf6659aad7138c4cc97312c297bb6582a0d 100644
--- a/src/pairs/sim/simulation.py
+++ b/src/pairs/sim/simulation.py
@@ -53,6 +53,7 @@ class Simulation:
         self._check_properties_resize = False
         self._resizes_to_check = {}
         self._module_name = None
+        self._module_temps = []
         self.dims = dims
         self.ntimesteps = timesteps
         self.expr_id = 0
@@ -64,7 +65,8 @@ class Simulation:
 
     def add_module(self, module):
         assert isinstance(module, Module), "add_module(): Given parameter is not of type Module!"
-        self.module_list.append(module)
+        if module.name not in [m.name for m in self.module_list]:
+            self.module_list.append(module)
 
     def modules(self):
         sorted_mods = []
@@ -122,7 +124,9 @@ class Simulation:
         return self.vars.add(var_name, var_type, init_value)
 
     def add_temp_var(self, init_value):
-        return self.vars.add_temp(init_value)
+        var = self.vars.add_temp(init_value)
+        self._module_temps.append(var)
+        return var
 
     def add_symbol(self, sym_type):
         return Symbol(self, sym_type)
@@ -169,6 +173,7 @@ class Simulation:
 
     def module_name(self, name):
         self._module_name = name
+        self._module_temps = []
 
     def check_properties_resize(self):
         self._check_properties_resize = True
@@ -186,7 +191,8 @@ class Simulation:
                 block=Block(self, self._block),
                 resizes_to_check=self._resizes_to_check,
                 check_properties_resize=self._check_properties_resize,
-                run_on_device=run_on_device))
+                run_on_device=run_on_device,
+                temps=self._module_temps))
 
     def add_statement(self, stmt):
         if not self.scope: