diff --git a/runtime/boundary_weights.hpp b/runtime/boundary_weights.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..165a62da6d6405838bcc27182773b6dd540eefa4
--- /dev/null
+++ b/runtime/boundary_weights.hpp
@@ -0,0 +1,54 @@
+#include <iostream>
+#include <string.h>
+#include <fstream>
+#include <sstream>
+//---
+#include "pairs.hpp"
+#include "pairs_common.hpp"
+#include "gen/property_interface.hpp"
+
+#pragma once
+
+namespace pairs {
+
+void compute_boundary_weights(
+    PairsSimulation *ps,
+    real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax,
+    int *comp_weight, int *comm_weight) {
+
+    const int particle_capacity = ps->getParticleCapacity();
+    const int nlocal = ps->getNumberOfLocalParticles();
+    const int nghost = ps->getNumberOfGhostParticles();
+    auto position_prop = ps->getPropertyByName("position");
+
+    //ps->copyPropertyToDevice(position_prop, Ignore);
+
+    *comp_weight = 0;
+    *comm_weight = 0;
+
+    for(int i = 0; i < nlocal; i++) {
+        real_t pos_x = pairs_interface::getPosition(position_ptr, i, 0, particle_capacity);
+        real_t pos_y = pairs_interface::getPosition(position_ptr, i, 1, particle_capacity);
+        real_t pos_z = pairs_interface::getPosition(position_ptr, i, 2, particle_capacity);
+
+        if( pos_x > xmin && pos_x <= xmax &&
+            pos_y > ymin && pos_y <= ymax &&
+            pos_z > zmin && pos_z <= zmax) {
+                *comp_weight++;
+        }
+    }
+
+    for(int i = nlocal; i < nlocal + nghost; i++) {
+        real_t pos_x = pairs_interface::getPosition(position_ptr, i, 0, particle_capacity);
+        real_t pos_y = pairs_interface::getPosition(position_ptr, i, 1, particle_capacity);
+        real_t pos_z = pairs_interface::getPosition(position_ptr, i, 2, particle_capacity);
+
+        if( pos_x > xmin && pos_x <= xmax &&
+            pos_y > ymin && pos_y <= ymax &&
+            pos_z > zmin && pos_z <= zmax) {
+                *comm_weight++;
+        }
+    }
+}
+
+}
\ No newline at end of file
diff --git a/runtime/domain/block_forest.cpp b/runtime/domain/block_forest.cpp
index af30587cedd075b475353630a33060c625fbfdbd..8da11deaa0568af1a9b56c5e476c7072c7456825 100644
--- a/runtime/domain/block_forest.cpp
+++ b/runtime/domain/block_forest.cpp
@@ -12,6 +12,7 @@
 #include <pe/amr/weight_assignment/MetisAssignmentFunctor.h>
 #include <pe/amr/weight_assignment/WeightAssignmentFunctor.h>
 //---
+#include "../boundary_weights.hpp"
 #include "../pairs_common.hpp"
 #include "../devices/device.hpp"
 #include "regular_6d_stencil.hpp"
@@ -21,6 +22,9 @@ namespace pairs {
 
 void BlockForest::updateNeighborhood() {
     auto me = mpi::MPIManager::instance()->rank();
+    ranks.clear();
+    naabbs.clear();
+    aabbs.clear();
     this->nranks = 0;
     this->total_aabbs = 0;
 
@@ -50,23 +54,19 @@ void BlockForest::updateNeighborhood() {
         }
     }
 
-    vec_ranks.clear();
-    vec_naabbs.clear();
-    vec_aabbs.clear();
-
     for(auto& nbh: neighborhood) {
         auto rank = nbh.first;
         auto aabb_list = nbh.second;
-        vec_ranks.push_back((int) rank);
-        vec_naabbs.push_back((int) aabb_list.size());
+        ranks.push_back((int) rank);
+        naabbs.push_back((int) aabb_list.size());
 
         for(auto &aabb: aabb_list) {
-            vec_aabbs.push_back(aabb.xMin());
-            vec_aabbs.push_back(aabb.xMax());
-            vec_aabbs.push_back(aabb.yMin());
-            vec_aabbs.push_back(aabb.yMax());
-            vec_aabbs.push_back(aabb.zMin());
-            vec_aabbs.push_back(aabb.zMax());
+            aabbs.push_back(aabb.xMin());
+            aabbs.push_back(aabb.xMax());
+            aabbs.push_back(aabb.yMin());
+            aabbs.push_back(aabb.yMax());
+            aabbs.push_back(aabb.zMin());
+            aabbs.push_back(aabb.zMax());
             this->total_aabbs++;
         }
 
@@ -77,7 +77,7 @@ void BlockForest::updateNeighborhood() {
 
 
 void BlockForest::copyRuntimeArray(const std::string& name, void *dest, const int size) {
-    void *src = name.compare('ranks') ? vec_ranks.data() :
+    void *src = name.compare('ranks') ? ranks.data() :
                 name.compare('naabbs') ? vec_naabbs.data() :
                 name.compare('rank_offsets') ? offsets :
                 name.compare('pbc') ? vec_pbc.data() :
@@ -89,33 +89,7 @@ void BlockForest::copyRuntimeArray(const std::string& name, void *dest, const in
     std::memcpy(dest, src, size * tsize);
 }
 
-/*
-  extern fn md_compute_boundary_weights(
-    xmin: real_t, xmax: real_t, ymin: real_t, ymax: real_t, zmin: real_t, zmax: real_t,
-    computational_weight: &mut u32, communication_weight: &mut u32) -> () {
-
-    let grid = grid_;
-    let particle = make_particle(grid, array_dev, ParticleDataLayout(), null_layout());
-    let sum = @|a: i32, b: i32| { a + b };
-    let aabb = AABB {
-        xmin: xmin,
-        xmax: xmax,
-        ymin: ymin,
-        ymax: ymax,
-        zmin: zmin,
-        zmax: zmax
-    };
-
-    *computational_weight = reduce_i32(grid.nparticles, 0, sum, |i| {
-        select(is_within_domain(particle.get_position(i), aabb), 1, 0)
-    }) as u32;
-
-    *communication_weight = reduce_i32(grid.nghost, 0, sum, |i| {
-        select(is_within_domain(particle.get_position(grid.nparticles + i), aabb), 1, 0)
- }) as u32;
- */
-
-void BlockForest::updateWeights(real_t *position, int nparticles) {
+void BlockForest::updateWeights(PairsSimulation *ps, int nparticles) {
     mpi::BufferSystem bs(mpi::MPIManager::instance()->comm(), 756);
 
     info.clear();
@@ -124,7 +98,8 @@ void BlockForest::updateWeights(real_t *position, int nparticles) {
         auto aabb = block->getAABB();
         auto& block_info = info[block->getId()];
 
-        pairs->computeBoundaryWeights(
+        pairs::compute_boundary_weights(
+            ps,
             aabb.xMin(), aabb.xMax(), aabb.yMin(), aabb.yMax(), aabb.zMin(), aabb.zMax(),
             &(block_info.computationalWeight), &(block_info.communicationWeight));
 
@@ -133,7 +108,8 @@ void BlockForest::updateWeights(real_t *position, int nparticles) {
             const auto b_aabb = forest->getAABBFromBlockId(b_id);
             auto& b_info = info[b_id];
 
-            pairs->computeBoundaryWeights(
+            pairs::compute_boundary_weights(
+                ps,
                 b_aabb.xMin(), b_aabb.xMax(), b_aabb.yMin(), b_aabb.yMax(), b_aabb.zMin(), b_aabb.zMax(),
                 &(b_info.computationalWeight), &(b_info.communicationWeight));
         }
@@ -352,20 +328,19 @@ bool BlockForest::isWithinSubdomain(real_t x, real_t y, real_t z) {
     return false;
 }
 
-void BlockForest::communicateSizes(int dim, const int *send_sizes, int *recv_sizes) {
-    if(prev[dim] != rank) {
-        MPI_Send(&send_sizes[dim * 2 + 0], 1, MPI_INT, prev[dim], 0, MPI_COMM_WORLD);
-        MPI_Recv(&recv_sizes[dim * 2 + 0], 1, MPI_INT, next[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-    } else {
-        recv_sizes[dim * 2 + 0] = send_sizes[dim * 2 + 0];
-    }
+void BlockForest::communicateSizes(int dim, const int *nsend, int *nrecv) {
+    std::vector<MPI_Request> send_requests(ranks.size(), MPI_REQUEST_NULL);
+    std::vector<MPI_Request> recv_requests(ranks.size(), MPI_REQUEST_NULL);
+    size_t nranks = 0;
 
-    if(next[dim] != rank) {
-        MPI_Send(&send_sizes[dim * 2 + 1], 1, MPI_INT, next[dim], 0, MPI_COMM_WORLD);
-        MPI_Recv(&recv_sizes[dim * 2 + 1], 1, MPI_INT, prev[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-    } else {
-        recv_sizes[dim * 2 + 1] = send_sizes[dim * 2 + 1];
+    for(auto neigh_rank: ranks) {
+        MPI_Irecv(&recv_sizes[i], 1, MPI_INT, neigh_rank, 0, MPI_COMM_WORLD, &nrecv[i], &recv_requests[i]);
+        MPI_Isend(&send_sizes[i], 1, MPI_INT, neigh_rank, 0, MPI_COMM_WORLD, &nsend[i], &send_requests[i]);
+        nranks++;
     }
+
+    MPI_Waitall(nranks, send_requests.data(), MPI_STATUSES_IGNORE);
+    MPI_Waitall(nranks, recv_requests.data(), MPI_STATUSES_IGNORE);
 }
 
 void BlockForest::communicateData(
@@ -373,110 +348,41 @@ void BlockForest::communicateData(
     const real_t *send_buf, const int *send_offsets, const int *nsend,
     real_t *recv_buf, const int *recv_offsets, const int *nrecv) {
 
-    //MPI_Request recv_requests[2];
-    //MPI_Request send_requests[2];
-    const real_t *send_prev = &send_buf[send_offsets[dim * 2 + 0] * elem_size];
-    const real_t *send_next = &send_buf[send_offsets[dim * 2 + 1] * elem_size];
-    real_t *recv_prev = &recv_buf[recv_offsets[dim * 2 + 0] * elem_size];
-    real_t *recv_next = &recv_buf[recv_offsets[dim * 2 + 1] * elem_size];
-
-    if(prev[dim] != rank) {
-        MPI_Sendrecv(
-            send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0,
-            recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, next[dim], 0,
-            MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-
-        /*
-        MPI_Irecv(
-            recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0,
-            MPI_COMM_WORLD, &recv_requests[0]);
-
-        MPI_Isend(
-            send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0,
-            MPI_COMM_WORLD, &send_requests[0]);
-        */
-    } else {
-        pairs::copy_in_device(recv_prev, send_prev, nsend[dim * 2 + 0] * elem_size * sizeof(real_t));
-    }
-
-    if(next[dim] != rank) {
-        MPI_Sendrecv(
-            send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0,
-            recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, prev[dim], 0,
-            MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-
-        /*
-        MPI_Irecv(
-            recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0,
-            MPI_COMM_WORLD, &recv_requests[1]);
-
-        MPI_Isend(
-            send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0,
-            MPI_COMM_WORLD, &send_requests[1]);
-        */
-    } else {
-        pairs::copy_in_device(recv_next, send_next, nsend[dim * 2 + 1] * elem_size * sizeof(real_t));
-    }
-
-    //MPI_Waitall(2, recv_requests, MPI_STATUSES_IGNORE);
-    //MPI_Waitall(2, send_requests, MPI_STATUSES_IGNORE);
-}
-
-void BlockForest::communicateAllData(
-    int ndims, int elem_size,
-    const real_t *send_buf, const int *send_offsets, const int *nsend,
-    real_t *recv_buf, const int *recv_offsets, const int *nrecv) {
-
-    //std::vector<MPI_Request> send_requests(ndims * 2, MPI_REQUEST_NULL);
-    //std::vector<MPI_Request> recv_requests(ndims * 2, MPI_REQUEST_NULL);
+    std::vector<MPI_Request> send_requests(ranks.size(), MPI_REQUEST_NULL);
+    std::vector<MPI_Request> recv_requests(ranks.size(), MPI_REQUEST_NULL);
+    size_t nranks = 0;
 
-    for (int d = 0; d < ndims; d++) {
-        const real_t *send_prev = &send_buf[send_offsets[d * 2 + 0] * elem_size];
-        const real_t *send_next = &send_buf[send_offsets[d * 2 + 1] * elem_size];
-        real_t *recv_prev = &recv_buf[recv_offsets[d * 2 + 0] * elem_size];
-        real_t *recv_next = &recv_buf[recv_offsets[d * 2 + 1] * elem_size];
+    for(auto neigh_rank: ranks) {
+        const real_t *send_ptr = &send_buf[send_offsets[nranks] * elem_size];
+        real_t *recv_ptr = &recv_buf[recv_offsets[nranks] * elem_size];
 
-        if (prev[d] != rank) {
-            MPI_Sendrecv(
-                send_prev, nsend[d * 2 + 0] * elem_size, MPI_DOUBLE, prev[d], 0,
-                recv_prev, nrecv[d * 2 + 0] * elem_size, MPI_DOUBLE, next[d], 0,
-                MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+        if(neigh_rank != rank) {
+            MPI_Irecv(
+                recv_ptr, nrecv[nranks] * elem_size, MPI_DOUBLE, neigh_rank, 0,
+                MPI_COMM_WORLD, &recv_requests[0]);
 
-            /*
             MPI_Isend(
-                send_prev, nsend[d * 2 + 0] * elem_size, MPI_DOUBLE, prev[d], 0,
-                MPI_COMM_WORLD, &send_requests[d * 2 + 0]);
+                send_ptr, nsend[nranks] * elem_size, MPI_DOUBLE, neigh_rank, 0,
+                MPI_COMM_WORLD, &send_requests[0]);
 
-            MPI_Irecv(
-                recv_prev, nrecv[d * 2 + 0] * elem_size, MPI_DOUBLE, prev[d], 0,
-                MPI_COMM_WORLD, &recv_requests[d * 2 + 0]);
-            */
+            nranks++;
         } else {
-            pairs::copy_in_device(recv_prev, send_prev, nsend[d * 2 + 0] * elem_size * sizeof(real_t));
+            pairs::copy_in_device(recv_ptr, send_ptr, nsend[nranks] * elem_size * sizeof(real_t));
         }
 
-        if (next[d] != rank) {
-            MPI_Sendrecv(
-                send_next, nsend[d * 2 + 1] * elem_size, MPI_DOUBLE, next[d], 0,
-                recv_next, nrecv[d * 2 + 1] * elem_size, MPI_DOUBLE, prev[d], 0,
-                MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+        nranks++;
+    }
 
-            /*
-            MPI_Isend(
-                send_next, nsend[d * 2 + 1] * elem_size, MPI_DOUBLE, next[d], 0,
-                MPI_COMM_WORLD, &send_requests[d * 2 + 1]);
+    MPI_Waitall(nranks, send_requests.data(), MPI_STATUSES_IGNORE);
+    MPI_Waitall(nranks, recv_requests.data(), MPI_STATUSES_IGNORE);
+}
 
-            MPI_Irecv(
-                recv_next, nrecv[d * 2 + 1] * elem_size, MPI_DOUBLE, next[d], 0,
-                MPI_COMM_WORLD, &recv_requests[d * 2 + 1]);
-            */
-        } else {
-            pairs::copy_in_device(recv_next, send_next, nsend[d * 2 + 1] * elem_size * sizeof(real_t));
-        }
-    }
+void BlockForest::communicateAllData(
+    int ndims, int elem_size,
+    const real_t *send_buf, const int *send_offsets, const int *nsend,
+    real_t *recv_buf, const int *recv_offsets, const int *nrecv) {
 
-    //MPI_Waitall(ndims * 2, send_requests.data(), MPI_STATUSES_IGNORE);
-    //MPI_Waitall(ndims * 2, recv_requests.data(), MPI_STATUSES_IGNORE);
+    this->communicateData(0, elem_size, send_buf, send_offsets, nsend, recv_buf, recv_offsets, nrecv);
 }
 
 }
diff --git a/runtime/pairs.hpp b/runtime/pairs.hpp
index 000e6c47383a2a6996de27d89a15a4bc8702b3b7..ca56b6ec6095cef180269a09aa5cbf73f488915d 100644
--- a/runtime/pairs.hpp
+++ b/runtime/pairs.hpp
@@ -36,13 +36,23 @@ private:
     std::vector<Array> arrays;
     DeviceFlags *prop_flags, *contact_prop_flags, *array_flags;
     Timers<double> *timers;
+    int *nlocal, *nghost;
 
 public:
-    PairsSimulation(int nprops_, int ncontactprops_, int narrays_, DomainPartitioners dom_part_type_) {
+    PairsSimulation(
+        int nprops_,
+        int ncontactprops_,
+        int narrays_,
+        int *nlocal_,
+        int *nghost_,
+        DomainPartitioners dom_part_type_) {
+
         dom_part_type = dom_part_type_;
         prop_flags = new DeviceFlags(nprops_);
         contact_prop_flags = new DeviceFlags(ncontactprops_);
         array_flags = new DeviceFlags(narrays_);
+        nlocal = nlocal_;
+        nghost = nghost_;
         timers = new Timers<double>(1e-6);
     }
 
@@ -60,6 +70,9 @@ public:
        return RuntimeVar<T>(h_ptr); 
     }
 
+    const int getNumberOfLocalParticles() { return *nlocal; }
+    const int getNumberOfGhostParticles() { return *nghost; }
+
     // Arrays
     Array &getArray(array_t id);
     Array &getArrayByName(std::string name);
diff --git a/src/pairs/sim/comm.py b/src/pairs/sim/comm.py
index 3a439d8048517f2b4c8b0cb5023c1942787d71bd..08fb1438ae62ce34d57b2f76435f33db304e6780 100644
--- a/src/pairs/sim/comm.py
+++ b/src/pairs/sim/comm.py
@@ -24,23 +24,22 @@ class Comm:
         self.send_capacity    = sim.add_var('send_capacity', Types.Int32, 200000)
         self.recv_capacity    = sim.add_var('recv_capacity', Types.Int32, 200000)
         self.elem_capacity    = sim.add_var('elem_capacity', Types.Int32, 40)
-        self.neigh_capacity   = sim.add_var('neigh_capacity', Types.Int32, 10)
-        self.nsend            = sim.add_array('nsend', [self.neigh_capacity], Types.Int32)
-        self.send_offsets     = sim.add_array('send_offsets', [self.neigh_capacity], Types.Int32)
+        self.nsend            = sim.add_array('nsend', [dom_part.nranks_capacity], Types.Int32)
+        self.send_offsets     = sim.add_array('send_offsets', [dom_part.nranks_capacity], Types.Int32)
         self.send_buffer      = sim.add_array('send_buffer', [self.send_capacity, self.elem_capacity], Types.Real, arr_sync=False)
         self.send_map         = sim.add_array('send_map', [self.send_capacity], Types.Int32, arr_sync=False)
         self.exchg_flag       = sim.add_array('exchg_flag', [sim.particle_capacity], Types.Int32, arr_sync=False)
         self.exchg_copy_to    = sim.add_array('exchg_copy_to', [self.send_capacity], Types.Int32, arr_sync=False)
         self.send_mult        = sim.add_array('send_mult', [self.send_capacity, sim.ndims()], Types.Int32)
-        self.nrecv            = sim.add_array('nrecv', [self.neigh_capacity], Types.Int32)
-        self.recv_offsets     = sim.add_array('recv_offsets', [self.neigh_capacity], Types.Int32)
+        self.nrecv            = sim.add_array('nrecv', [dom_part.nranks_capacity], Types.Int32)
+        self.recv_offsets     = sim.add_array('recv_offsets', [dom_part.nranks_capacity], Types.Int32)
         self.recv_buffer      = sim.add_array('recv_buffer', [self.recv_capacity, self.elem_capacity], Types.Real, arr_sync=False)
         self.recv_map         = sim.add_array('recv_map', [self.recv_capacity], Types.Int32)
         self.recv_mult        = sim.add_array('recv_mult', [self.recv_capacity, sim.ndims()], Types.Int32)
-        self.nsend_contact    = sim.add_array('nsend_contact', [self.neigh_capacity], Types.Int32)
-        self.nrecv_contact    = sim.add_array('nrecv_contact', [self.neigh_capacity], Types.Int32)
-        self.contact_soffsets = sim.add_array('contact_soffsets', [self.neigh_capacity], Types.Int32)
-        self.contact_roffsets = sim.add_array('contact_roffsets', [self.neigh_capacity], Types.Int32)
+        self.nsend_contact    = sim.add_array('nsend_contact', [dom_part.nranks_capacity], Types.Int32)
+        self.nrecv_contact    = sim.add_array('nrecv_contact', [dom_part.nranks_capacity], Types.Int32)
+        self.contact_soffsets = sim.add_array('contact_soffsets', [dom_part.nranks_capacity], Types.Int32)
+        self.contact_roffsets = sim.add_array('contact_roffsets', [dom_part.nranks_capacity], Types.Int32)
 
     @pairs_inline
     def synchronize(self):
diff --git a/src/pairs/sim/domain_partitioning.py b/src/pairs/sim/domain_partitioning.py
index aae6dfc060d45fb2bac129ca169fe9296fd64cd5..1c88c4c4f750457d79ea5df7ee0fccdfe8317ab5 100644
--- a/src/pairs/sim/domain_partitioning.py
+++ b/src/pairs/sim/domain_partitioning.py
@@ -12,10 +12,12 @@ from pairs.sim.lowerable import Lowerable
 
 class DimensionRanges:
     def __init__(self, sim):
-        self.sim            = sim
-        self.neighbor_ranks = sim.add_static_array('neighbor_ranks', [sim.ndims() * 2], Types.Int32)
-        self.pbc            = sim.add_static_array('pbc', [sim.ndims() * 2], Types.Int32)
-        self.subdom         = sim.add_static_array('subdom', [sim.ndims() * 2], Types.Real)
+        self.sim                = sim
+        self.nranks             = 6
+        self.nranks_capacity    = self.nranks
+        self.neighbor_ranks     = sim.add_static_array('neighbor_ranks', [sim.ndims() * 2], Types.Int32)
+        self.pbc                = sim.add_static_array('pbc', [sim.ndims() * 2], Types.Int32)
+        self.subdom             = sim.add_static_array('subdom', [sim.ndims() * 2], Types.Real)
 
     def min(self, dim):
         return self.subdom[dim * 2 + 0]