diff --git a/runtime/domain/regular_6d_stencil.cpp b/runtime/domain/regular_6d_stencil.cpp
index 4e7631f071d65a8197f98d54789eee3f54bca4f4..f8f2472877b8383fbfe986104ac2df479f1c412e 100644
--- a/runtime/domain/regular_6d_stencil.cpp
+++ b/runtime/domain/regular_6d_stencil.cpp
@@ -118,7 +118,6 @@ void Regular6DStencil::communicateData(
     int dim, int elem_size,
     const real_t *send_buf, const int *send_offsets, const int *nsend,
     real_t *recv_buf, const int *recv_offsets, const int *nrecv) {
-    //std::cout << "communicateData" << std::endl;
 
     const real_t *send_prev = &send_buf[send_offsets[dim * 2 + 0] * elem_size];
     const real_t *send_next = &send_buf[send_offsets[dim * 2 + 1] * elem_size];
@@ -126,10 +125,6 @@ void Regular6DStencil::communicateData(
     real_t *recv_next = &recv_buf[recv_offsets[dim * 2 + 1] * elem_size];
 
     if(prev[dim] != rank) {
-        //std::cout << rank << ": send " << nsend[dim * 2 + 0] << " elems to " << prev[dim] << ", recv " << nrecv[dim * 2 + 0] << " elems from " << next[dim] << std::endl;
-        //MPI_Send(send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, MPI_COMM_WORLD);
-        //MPI_Recv(recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, next[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-
         MPI_Sendrecv(
             send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0,
             recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, next[dim], 0,
@@ -142,10 +137,6 @@ void Regular6DStencil::communicateData(
     }
 
     if(next[dim] != rank) {
-        //std::cout << rank << ": send " << nsend[dim * 2 + 1] << " elems to " << next[dim] << ", recv " << nrecv[dim * 2 + 1] << " elems from " << prev[dim] << std::endl;
-        //MPI_Send(send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, MPI_COMM_WORLD);
-        //MPI_Recv(recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, prev[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-
         MPI_Sendrecv(
             send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0,
             recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, prev[dim], 0,
@@ -158,4 +149,41 @@ void Regular6DStencil::communicateData(
     }
 }
 
+void Regular6DStencil::communicateAllData(
+    int ndims, int elem_size,
+    const real_t *send_buf, const int *send_offsets, const int *nsend,
+    real_t *recv_buf, const int *recv_offsets, const int *nrecv) {
+
+    for(int d = 0; d < ndims; d++) {
+        const real_t *send_prev = &send_buf[send_offsets[d * 2 + 0] * elem_size];
+        const real_t *send_next = &send_buf[send_offsets[d * 2 + 1] * elem_size];
+        real_t *recv_prev = &recv_buf[recv_offsets[d * 2 + 0] * elem_size];
+        real_t *recv_next = &recv_buf[recv_offsets[d * 2 + 1] * elem_size];
+
+        if(prev[d] != rank) {
+            MPI_Sendrecv(
+                send_prev, nsend[d * 2 + 0] * elem_size, MPI_DOUBLE, prev[d], 0,
+                recv_prev, nrecv[d * 2 + 0] * elem_size, MPI_DOUBLE, next[d], 0,
+                MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+        } else {
+            for(int i = 0; i < nsend[d * 2 + 0] * elem_size; i++) {
+                recv_prev[i] = send_prev[i];
+            }
+        }
+
+        if(next[d] != rank) {
+            MPI_Sendrecv(
+                send_next, nsend[d * 2 + 1] * elem_size, MPI_DOUBLE, next[d], 0,
+                recv_next, nrecv[d * 2 + 1] * elem_size, MPI_DOUBLE, prev[d], 0,
+                MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+        } else {
+            for(int i = 0; i < nsend[d * 2 + 1] * elem_size; i++) {
+                recv_next[i] = send_next[i];
+            }
+        }
+    }
+}
+
 }
diff --git a/runtime/domain/regular_6d_stencil.hpp b/runtime/domain/regular_6d_stencil.hpp
index 1c12ff45de9a2a1a69b1067cdc42d4c2e995c338..8a43b2d4954bc1fae4eff570a2972a8484dcf461 100644
--- a/runtime/domain/regular_6d_stencil.hpp
+++ b/runtime/domain/regular_6d_stencil.hpp
@@ -54,6 +54,11 @@ public:
         int dim, int elem_size,
         const real_t *send_buf, const int *send_offsets, const int *nsend,
         real_t *recv_buf, const int *recv_offsets, const int *nrecv);
+
+    void communicateAllData(
+        int ndims, int elem_size,
+        const real_t *send_buf, const int *send_offsets, const int *nsend,
+        real_t *recv_buf, const int *recv_offsets, const int *nrecv);
 };
 
 }
diff --git a/runtime/pairs.cpp b/runtime/pairs.cpp
index 9ec1f23a2bfc64eb2665d99a7625bcd731d9618a..93c714c3fc99a619fc28dca42213fda086c3e43d 100644
--- a/runtime/pairs.cpp
+++ b/runtime/pairs.cpp
@@ -305,7 +305,6 @@ void PairsSimulation::communicateSizes(int dim, const int *send_sizes, int *recv
     array_flags->setHostFlag(nrecv_id);
     array_flags->clearDeviceFlag(nrecv_id);
     this->getDomainPartitioner()->communicateSizes(dim, send_sizes, recv_sizes);
-    //PAIRS_DEBUG("send_sizes=[%d, %d], recv_sizes=[%d, %d]\n", send_sizes[dim * 2 + 0], send_sizes[dim * 2 + 1], recv_sizes[dim * 2 + 0], recv_sizes[dim * 2 + 1]);
 }
 
 void PairsSimulation::communicateData(
@@ -342,59 +341,42 @@ void PairsSimulation::communicateData(
         dim, elem_size, send_buf, send_offsets, nsend, recv_buf, recv_offsets, nrecv);
 
     copyArrayToDevice(recv_buf_id, Ignore, nrecv_all * elem_size * sizeof(real_t));
+}
 
-    /*
-    // Debug messages
-    const int elems_to_print = 5;
-
-    // Send buffer debug
-    for(int i = 0; i < 2; i++) {
-        int nsnd = nsend[dim * 2 + i];
-
-        PAIRS_DEBUG("send_buf=[");
-        for(int j = 0; j < MIN(elems_to_print, nsnd); j++) {
-            for(int k = 0; k < elem_size; k++) {
-                PAIRS_DEBUG("%f,", send_buf[(send_offsets[dim * 2 + i] + j) * elem_size + k]);
-            }
-        }
+void PairsSimulation::communicateAllData(
+    int ndims, int elem_size,
+    const real_t *send_buf, const int *send_offsets, const int *nsend,
+    real_t *recv_buf, const int *recv_offsets, const int *nrecv) {
 
-        if(elems_to_print * 2 < nsnd) {
-            PAIRS_DEBUG("\b ... ");
-        }
+    auto send_buf_id = getArrayByHostPointer(send_buf).getId();
+    auto recv_buf_id = getArrayByHostPointer(recv_buf).getId();
+    auto send_offsets_id = getArrayByHostPointer(send_offsets).getId();
+    auto recv_offsets_id = getArrayByHostPointer(recv_offsets).getId();
+    auto nsend_id = getArrayByHostPointer(nsend).getId();
+    auto nrecv_id = getArrayByHostPointer(nrecv).getId();
 
-        for(int j = MAX(elems_to_print, nsnd - elems_to_print); j < nsnd; j++) {
-            for(int k = 0; k < elem_size; k++) {
-                PAIRS_DEBUG("%f,", send_buf[(send_offsets[dim * 2 + i] + j) * elem_size + k]);
-            }
-        }
+    copyArrayToHost(send_offsets_id, ReadOnly);
+    copyArrayToHost(recv_offsets_id, ReadOnly);
+    copyArrayToHost(nsend_id, ReadOnly);
+    copyArrayToHost(nrecv_id, ReadOnly);
 
-        PAIRS_DEBUG("\b]\n");
+    int nsend_all = 0;
+    int nrecv_all = 0;
+    for(int d = 0; d <= ndims; d++) {
+        nsend_all += nsend[d * 2 + 0];
+        nsend_all += nsend[d * 2 + 1];
+        nrecv_all += nrecv[d * 2 + 0];
+        nrecv_all += nrecv[d * 2 + 1];
     }
 
-    // Receive buffer debug
-    for(int i = 0; i < 2; i++) {
-        int nrec = nrecv[dim * 2 + i];
-
-        PAIRS_DEBUG("recv_buf=[");
-        for(int j = 0; j < MIN(elems_to_print, nrec); j++) {
-            for(int k = 0; k < elem_size; k++) {
-                PAIRS_DEBUG("%f,", recv_buf[(recv_offsets[dim * 2 + i] + j) * elem_size + k]);
-            }
-        }
-
-        if(elems_to_print * 2 < nrec) {
-            PAIRS_DEBUG("\b ... ");
-        }
+    copyArrayToHost(send_buf_id, Ignore, nsend_all * elem_size * sizeof(real_t));
 
-        for(int j = MAX(elems_to_print, nrec - elems_to_print); j < nrec; j++) {
-            for(int k = 0; k < elem_size; k++) {
-                PAIRS_DEBUG("%f,", recv_buf[(recv_offsets[dim * 2 + i] + j) * elem_size + k]);
-            }
-        }
+    array_flags->setHostFlag(recv_buf_id);
+    array_flags->clearDeviceFlag(recv_buf_id);
+    this->getDomainPartitioner()->communicateAllData(
+        ndims, elem_size, send_buf, send_offsets, nsend, recv_buf, recv_offsets, nrecv);
 
-        PAIRS_DEBUG("\b]\n");
-    }
-    */
+    copyArrayToDevice(recv_buf_id, Ignore, nrecv_all * elem_size * sizeof(real_t));
 }
 
 void PairsSimulation::fillCommunicationArrays(int *neighbor_ranks, int *pbc, real_t *subdom) {
diff --git a/runtime/pairs.hpp b/runtime/pairs.hpp
index ba987e2ad510c6c67e164e7bae4349dc8120cdba..0cfbefb78e2d3d5fc73513c18fd889a80c36a0bf 100644
--- a/runtime/pairs.hpp
+++ b/runtime/pairs.hpp
@@ -229,11 +229,17 @@ public:
 
     Regular6DStencil *getDomainPartitioner() { return dom_part; }
     void communicateSizes(int dim, const int *send_sizes, int *recv_sizes);
+
     void communicateData(
         int dim, int elem_size,
         const real_t *send_buf, const int *send_offsets, const int *nsend,
         real_t *recv_buf, const int *recv_offsets, const int *nrecv);
 
+    void communicateAllData(
+        int ndims, int elem_size,
+        const real_t *send_buf, const int *send_offsets, const int *nsend,
+        real_t *recv_buf, const int *recv_offsets, const int *nrecv);
+
     void fillCommunicationArrays(int neighbor_ranks[], int pbc[], real_t subdom[]);
 
     // Device functions
diff --git a/src/pairs/sim/comm.py b/src/pairs/sim/comm.py
index 242c887531792b81385762f94a0421563aaa7ce1..3f552ed8735e36922a8be2cb701642d9187e3eb7 100644
--- a/src/pairs/sim/comm.py
+++ b/src/pairs/sim/comm.py
@@ -41,18 +41,31 @@ class Comm:
     def synchronize(self):
         # Every property that is not constant across timesteps and have neighbor accesses during any
         # interaction kernel (i.e. property[j] in force calculation kernel)
-        prop_list = [self.sim.property(p) for p in ['position', 'linear_velocity', 'angular_velocity'] if self.sim.property(p) is not None]
-        for step in range(self.dom_part.number_of_steps()):
-            PackGhostParticles(self, step, prop_list)
-            CommunicateData(self, step, prop_list)
-            UnpackGhostParticles(self, step, prop_list)
+        prop_names = ['position', 'linear_velocity', 'angular_velocity']
+        prop_list = [ self.sim.property(p) for p in prop_names if self.sim.property(p) is not None]
+
+        PackAllGhostParticles(self, prop_list)
+        CommunicateAllData(self, prop_list)
+        UnpackAllGhostParticles(self, prop_list)
 
     @pairs_inline
     def borders(self):
         # Every property that has neighbor accesses during any interaction kernel (i.e. property[j]
         # exists in any force calculation kernel)
-        # We ignore normal because there should be no halfspace ghosts
-        prop_list = [self.sim.property(p) for p in ['uid', 'type', 'mass', 'radius', 'position', 'linear_velocity', 'angular_velocity', 'shape'] if self.sim.property(p) is not None]
+        # We ignore normal because there should be no ghost half-spaces
+        prop_names = [
+            'uid',
+            'type',
+            'mass',
+            'radius',
+            'position',
+            'linear_velocity',
+            'angular_velocity',
+            'shape'
+        ]
+
+        prop_list = [self.sim.property(p) for p in prop_names if self.sim.property(p) is not None]
+
         Assign(self.sim, self.nsend_all, 0)
         Assign(self.sim, self.sim.nghost, 0)
 
@@ -75,12 +88,15 @@ class Comm:
             PackGhostParticles(self, step, prop_list)
             CommunicateData(self, step, prop_list)
             UnpackGhostParticles(self, step, prop_list)
-            Assign(self.sim, self.sim.nghost, self.sim.nghost + sum([self.nrecv[j] for j in self.dom_part.step_indexes(step)]))
+
+            step_nrecv = sum([self.nrecv[j] for j in self.dom_part.step_indexes(step)])
+            Assign(self.sim, self.sim.nghost, self.sim.nghost + step_nrecv)
 
     @pairs_inline
     def exchange(self):
         # Every property except volatiles
         prop_list = self.sim.properties.non_volatiles()
+
         for step in range(self.dom_part.number_of_steps()):
             Assign(self.sim, self.nsend_all, 0)
             Assign(self.sim, self.sim.nghost, 0)
@@ -130,9 +146,31 @@ class CommunicateData(Lowerable):
     @pairs_inline
     def lower(self):
         elem_size = sum([Types.number_of_elements(self.sim, p.type()) for p in self.prop_list])
-        Call_Void(self.sim, "pairs->communicateData", [self.step, elem_size,
-                                                       self.comm.send_buffer, self.comm.send_offsets, self.comm.nsend,
-                                                       self.comm.recv_buffer, self.comm.recv_offsets, self.comm.nrecv])
+
+        Call_Void(self.sim,
+                  "pairs->communicateData",
+                  [self.step, elem_size,
+                   self.comm.send_buffer, self.comm.send_offsets, self.comm.nsend,
+                   self.comm.recv_buffer, self.comm.recv_offsets, self.comm.nrecv])
+
+
+class CommunicateAllData(Lowerable):
+    def __init__(self, comm, prop_list):
+        super().__init__(comm.sim)
+        self.comm = comm
+        self.prop_list = prop_list
+        self.sim.add_statement(self)
+
+    @pairs_inline
+    def lower(self):
+        elem_size = sum([Types.number_of_elements(self.sim, p.type()) for p in self.prop_list])
+
+        Call_Void(
+            self.sim,
+            "pairs->communicateAllData",
+            [self.comm.dom_part.number_of_steps(), elem_size,
+             self.comm.send_buffer, self.comm.send_offsets, self.comm.nsend,
+             self.comm.recv_buffer, self.comm.recv_offsets, self.comm.nrecv])
 
 
 class DetermineGhostParticles(Lowerable):
@@ -281,6 +319,82 @@ class UnpackGhostParticles(Lowerable):
                     p_offset += 1
 
 
+class PackAllGhostParticles(Lowerable):
+    def __init__(self, comm, prop_list):
+        super().__init__(comm.sim)
+        self.comm = comm
+        self.prop_list = prop_list
+        self.sim.add_statement(self)
+
+    def get_elems_per_particle(self):
+        return sum([Types.number_of_elements(self.sim, p.type()) for p in self.prop_list])
+
+    @pairs_device_block
+    def lower(self):
+        send_buffer = self.comm.send_buffer
+        send_buffer.set_stride(1, self.get_elems_per_particle())
+        send_map = self.comm.send_map
+        send_mult = self.comm.send_mult
+        self.sim.module_name(f"pack_all_ghost_particles" + "_".join([str(p.id()) for p in self.prop_list]))
+
+        for i in For(self.sim, 0, self.comm.nsend_all):
+            p_offset = 0
+            m = send_map[i]
+            for p in self.prop_list:
+                if not Types.is_scalar(p.type()):
+                    nelems = Types.number_of_elements(self.sim, p.type())
+                    for e in range(nelems):
+                        src = p[m][e]
+                        if p == self.sim.position():
+                            src += send_mult[i][e] * self.sim.grid.length(e)
+
+                        Assign(self.sim, send_buffer[i][p_offset + e], src)
+
+                    p_offset += nelems
+
+                else:
+                    cast_fn = lambda x: Cast(self.sim, x, Types.Real) if p.type() != Types.Real else x
+                    Assign(self.sim, send_buffer[i][p_offset], cast_fn(p[m]))
+                    p_offset += 1
+
+
+class UnpackAllGhostParticles(Lowerable):
+    def __init__(self, comm, prop_list):
+        super().__init__(comm.sim)
+        self.comm = comm
+        self.prop_list = prop_list
+        self.sim.add_statement(self)
+
+    def get_elems_per_particle(self):
+        return sum([Types.number_of_elements(self.sim, p.type()) for p in self.prop_list])
+
+    @pairs_device_block
+    def lower(self):
+        nlocal = self.sim.nlocal
+        dom_part = self.comm.dom_part
+        recv_buffer = self.comm.recv_buffer
+        recv_buffer.set_stride(1, self.get_elems_per_particle())
+        self.sim.module_name(f"unpack_all_ghost_particles" + "_".join([str(p.id()) for p in self.prop_list]))
+
+        nrecv_size = sum([len(dom_part.step_indexes(s)) for s in range(dom_part.number_of_steps())])
+        nrecv_all = sum([self.comm.nrecv[j] for j in range(nrecv_size)])
+
+        for i in For(self.sim, 0, nrecv_all):
+            p_offset = 0
+            for p in self.prop_list:
+                if not Types.is_scalar(p.type()):
+                    nelems = Types.number_of_elements(self.sim, p.type())
+                    for e in range(nelems):
+                        Assign(self.sim, p[nlocal + i][e], recv_buffer[i][p_offset + e])
+
+                    p_offset += nelems
+
+                else:
+                    cast_fn = lambda x: Cast(self.sim, x, p.type()) if p.type() != Types.Real else x
+                    Assign(self.sim, p[nlocal + i], cast_fn(recv_buffer[i][p_offset]))
+                    p_offset += 1
+
+
 class RemoveExchangedParticles_part1(Lowerable):
     def __init__(self, comm):
         super().__init__(comm.sim)