diff --git a/runtime/domain/ParticleDataHanding.hpp b/runtime/domain/ParticleDataHanding.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..879b9ae83a943c94f7482de0d5c3abb0d89d7907
--- /dev/null
+++ b/runtime/domain/ParticleDataHanding.hpp
@@ -0,0 +1,123 @@
+#include <blockforest/BlockForest.h>
+#include <blockforest/BlockDataHandling.h>
+
+namespace walberla {
+
+namespace internal {
+
+class ParticleDeleter {
+    friend bool operator==(const ParticleDeleter& lhs, const ParticleDeleter& rhs);
+
+public:
+    ParticleDeleter(const math::AABB& aabb) : aabb_(aabb) {}
+    ~ParticleDeleter() {}
+
+private:
+    math::AABB aabb_;
+};
+
+inline bool operator==(const ParticleDeleter& lhs, const ParticleDeleter& rhs) {
+    return lhs.aabb_ == rhs.aabb_;
+}
+
+} // namespace internal
+
+class ParticleDataHandling : public blockforest::BlockDataHandling<internal::ParticleDeleter>{
+
+public:
+    ParticleDataHandling() {}
+    virtual ~ParticleDataHandling() {}
+
+    virtual internal::ParticleDeleter *initialize(IBlock *const block) WALBERLA_OVERRIDE {
+        return new internal::ParticleDeleter(block->getAABB());
+    }
+
+    virtual void serialize(IBlock *const block, const BlockDataID& id, mpi::SendBuffer& buffer) WALBERLA_OVERRIDE {
+        serializeImpl(static_cast<Block *const>(block), id, buffer, 0, false);
+    }
+
+    virtual internal::ParticleDeleter* deserialize(IBlock *const block) WALBERLA_OVERRIDE {
+        return initialize(block);
+    }
+
+    virtual void deserialize(IBlock *const block, const BlockDataID& id, mpi::RecvBuffer& buffer) WALBERLA_OVERRIDE {
+        deserializeImpl(block, id, buffer);
+    }
+
+    virtual void serializeCoarseToFine(Block *const block, const BlockDataID& id, mpi::SendBuffer& buffer, const uint_t child)
+        WALBERLA_OVERRIDE {
+        serializeImpl(block, id, buffer, child, true);
+    }
+
+    virtual void serializeFineToCoarse(Block *const block, const BlockDataID& id, mpi::SendBuffer& buffer) WALBERLA_OVERRIDE {
+        serializeImpl(block, id, buffer, 0, false);
+    }
+
+    virtual internal::ParticleDeleter *deserializeCoarseToFine(Block *const block) WALBERLA_OVERRIDE {
+        return initialize(block);
+    }
+
+    virtual internal::ParticleDeleter *deserializeFineToCoarse(Block *const block) WALBERLA_OVERRIDE {
+        return initialize(block);
+    }
+
+    virtual void deserializeCoarseToFine(Block *const block, const BlockDataID& id, mpi::RecvBuffer& buffer) WALBERLA_OVERRIDE {
+        deserializeImpl(block, id, buffer);
+    }
+
+    virtual void deserializeFineToCoarse(Block *const block, const BlockDataID& id, mpi::RecvBuffer& buffer, const uint_t)
+        WALBERLA_OVERRIDE {
+        deserializeImpl(block, id, buffer);
+    }
+
+private:
+    void serializeImpl(Block *const block, const BlockDataID&, mpi::SendBuffer& buffer, const uint_t child, bool check_child) {
+        auto ptr = buffer.allocate<uint_t>();
+        double aabb_check[6];
+        int nparticles;
+
+        if(check_child) {
+            const auto child_id = BlockID(block->getId(), child);
+            const auto child_aabb = block->getForest().getAABBFromBlockId(child_id);
+            aabb_check[0] = child_aabb.xMin();
+            aabb_check[1] = child_aabb.xMax();
+            aabb_check[2] = child_aabb.yMin();
+            aabb_check[3] = child_aabb.yMax();
+            aabb_check[4] = child_aabb.zMin();
+            aabb_check[5] = child_aabb.zMax();
+        } else {
+            const auto aabb = block->getAABB();
+            aabb_check[0] = aabb.xMin();
+            aabb_check[1] = aabb.xMax();
+            aabb_check[2] = aabb.yMin();
+            aabb_check[3] = aabb.yMax();
+            aabb_check[4] = aabb.zMin();
+            aabb_check[5] = aabb.zMax();
+        }
+
+        nparticles = md_serialize_particles(aabb_check);
+
+        for(int i = 0; i < nparticles * 7; ++i) {
+            buffer << md_get_send_buffer_value(i);
+        }
+
+        *ptr = (uint_t) nparticles;
+    }
+
+    void deserializeImpl(IBlock *const, const BlockDataID&, mpi::RecvBuffer& buffer) {
+        uint_t nparticles;
+        buffer >> nparticles;
+
+        md_resize_recv_buffer_capacity((int) nparticles);
+
+        for(int i = 0; i < (int) nparticles * 7; ++i) {
+            double v;
+            buffer >> v;
+            md_set_recv_buffer_value(i, v);
+        }
+
+        md_deserialize_particles((int) nparticles);
+    }
+};
+
+} // namespace walberla
\ No newline at end of file
diff --git a/runtime/domain/block_forest.cpp b/runtime/domain/block_forest.cpp
index cc6ec7fca84610b3fbec214e1d4de8ebcddad4bd..af30587cedd075b475353630a33060c625fbfdbd 100644
--- a/runtime/domain/block_forest.cpp
+++ b/runtime/domain/block_forest.cpp
@@ -15,18 +15,15 @@
 #include "../pairs_common.hpp"
 #include "../devices/device.hpp"
 #include "regular_6d_stencil.hpp"
-#include "MDDataHandling.h"
+#include "ParticleDataHandling.h"
 
 namespace pairs {
 
-void BlockForest::updateNeighborhood(
-    std::shared_ptr<BlockForest> forest,
-    blockforest::InfoCollection& info,
-    real_t *ranks,
-    real_t *naabbs,
-    real_t *aabbs) {
-
+void BlockForest::updateNeighborhood() {
     auto me = mpi::MPIManager::instance()->rank();
+    this->nranks = 0;
+    this->total_aabbs = 0;
+
     for(auto& iblock: *forest) {
         auto block = static_cast<blockforest::Block *>(&iblock);
         auto& block_info = info[block->getId()];
@@ -56,7 +53,6 @@ void BlockForest::updateNeighborhood(
     vec_ranks.clear();
     vec_naabbs.clear();
     vec_aabbs.clear();
-    total_aabbs = 0;
 
     for(auto& nbh: neighborhood) {
         auto rank = nbh.first;
@@ -71,16 +67,14 @@ void BlockForest::updateNeighborhood(
             vec_aabbs.push_back(aabb.yMax());
             vec_aabbs.push_back(aabb.zMin());
             vec_aabbs.push_back(aabb.zMax());
-            total_aabbs++;
+            this->total_aabbs++;
         }
+
+        this->nranks++;
     }
+}
 
-    // TODO: delegate these next lines to functions (getRuntimeInt?)
-    *nranks = nranks;
-    *total_aabbs = total_aabbs;
 
-    ps->copyToDevice(aabbs);
-}
 
 void BlockForest::copyRuntimeArray(const std::string& name, void *dest, const int size) {
     void *src = name.compare('ranks') ? vec_ranks.data() :
@@ -91,8 +85,8 @@ void BlockForest::copyRuntimeArray(const std::string& name, void *dest, const in
                 name.compare('subdom') ? subdom;
 
     bool is_real = name.compare('aabbs') || name.compare('subdom');
-    int tsize = (is_real) ? sizeof(real_t) : sizeof(int);
-    std::memcpy(dest, src, size * tsize)
+    int tsize = is_real ? sizeof(real_t) : sizeof(int);
+    std::memcpy(dest, src, size * tsize);
 }
 
 /*
@@ -121,12 +115,7 @@ void BlockForest::copyRuntimeArray(const std::string& name, void *dest, const in
  }) as u32;
  */
 
-void BlockForest::updateWeights(
-    shared_ptr<BlockForest> forest,
-    blockforest::InfoCollection& info,
-    real_t *position,
-    int nparticles) {
-
+void BlockForest::updateWeights(real_t *position, int nparticles) {
     mpi::BufferSystem bs(mpi::MPIManager::instance()->comm(), 756);
 
     info.clear();
@@ -229,20 +218,30 @@ uint_t BlockForest::getInitialRefinementLevel(uint_t num_processes) {
     return refinementLevel;
 }
 
-void BlockForest::getBlockForestAABB(double (&rank_aabb)[6]) {
+void BlockForest::setBoundingBox() {
     auto aabb_union = forest->begin()->getAABB();
-
     for(auto& iblock: *forest) {
         auto block = static_cast<blockforest::Block *>(&iblock);
         aabb_union.merge(block->getAABB());
     }
 
-    rank_aabb[0] = aabb_union.xMin();
-    rank_aabb[1] = aabb_union.xMax();
-    rank_aabb[2] = aabb_union.yMin();
-    rank_aabb[3] = aabb_union.yMax();
-    rank_aabb[4] = aabb_union.zMin();
-    rank_aabb[5] = aabb_union.zMax();
+    subdom[0] = aabb_union.xMin();
+    subdom[1] = aabb_union.xMax();
+    subdom[2] = aabb_union.yMin();
+    subdom[3] = aabb_union.yMax();
+    subdom[4] = aabb_union.zMin();
+    subdom[5] = aabb_union.zMax();
+}
+
+void BlockForest::rebalance() {
+    if(balance_workload) {
+        this->updateWeights();
+        forest->refresh();
+    }
+
+    this->setBoundingBox();
+    this->updateWeights();
+    this->updateNeighborhood();
 }
 
 void BlockForest::initialize(int *argc, char ***argv) {
@@ -260,15 +259,81 @@ void BlockForest::initialize(int *argc, char ***argv) {
     auto block_config = use_load_balancing ? Vector3<uint_t>(1, 1, 1) : getBlockConfig(procs, gridsize[0], gridsize[1], gridsize[2]);
     auto ref_level = use_load_balancing ? getInitialRefinementLevel(procs) : 0;
 
-    auto forest = blockforest::createBlockForest(
+    forest = blockforest::createBlockForest(
         domain, block_config, Vector3<bool>(true, true, true), procs, ref_level);
 
-    auto is_within_domain = bind(isWithinBlockForest, _1, _2, _3, forest);
-    auto info = make_shared<blockforest::InfoCollection>();
-    getBlockForestAABB(forest, rank_aabb);
+    info = make_shared<blockforest::InfoCollection>();
+    this->setBoundingBox();
+
+    if(balance_workload) {
+        this->initializeWorkloadBalancer();
+    }
+}
+
+void BlockForest::initializeWorkloadBalancer() {
+    const std::string algorithm = "morton";
+    real_t baseWeight = 1.0;
+    real_t metisipc2redist = 1.0;
+    size_t regridMin = 10;
+    size_t regridMax = 100;
+    int maxBlocksPerProcess = 100;
+    string metisAlgorithm = "none";
+    string metisWeightsToUse = "none";
+    string metisEdgeSource = "none";
+
+    forest->recalculateBlockLevelsInRefresh(true);
+    forest->alwaysRebalanceInRefresh(true);
+    forest->reevaluateMinTargetLevelsAfterForcedRefinement(true);
+    forest->allowRefreshChangingDepth(true);
+
+    forest->allowMultipleRefreshCycles(false);
+    forest->checkForEarlyOutInRefresh(false);
+    forest->checkForLateOutInRefresh(false);
+    forest->setRefreshMinTargetLevelDeterminationFunction(pe::amr::MinMaxLevelDetermination(info, regridMin, regridMax));
+
+    for_each(algorithm.begin(), algorithm.end(), [](char& c) { c = (char) ::tolower(c); });
+
+    if(algorithm == "morton") {
+        forest->setRefreshPhantomBlockDataAssignmentFunction(pe::amr::WeightAssignmentFunctor(info, baseWeight));
+        forest->setRefreshPhantomBlockDataPackFunction(pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor());
+        forest->setRefreshPhantomBlockDataUnpackFunction(pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor());
+
+        auto prepFunc = blockforest::DynamicCurveBalance<pe::amr::WeightAssignmentFunctor::PhantomBlockWeight>(false, true, false);
+
+        prepFunc.setMaxBlocksPerProcess(maxBlocksPerProcess);
+        forest->setRefreshPhantomBlockMigrationPreparationFunction(prepFunc);
+    } else if(algorithm == "hilbert") {
+        forest->setRefreshPhantomBlockDataAssignmentFunction(pe::amr::WeightAssignmentFunctor(info, baseWeight));
+        forest->setRefreshPhantomBlockDataPackFunction(pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor());
+        forest->setRefreshPhantomBlockDataUnpackFunction(pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor());
+
+        auto prepFunc = blockforest::DynamicCurveBalance<pe::amr::WeightAssignmentFunctor::PhantomBlockWeight>(true, true, false);
+
+        prepFunc.setMaxBlocksPerProcess(maxBlocksPerProcess);
+        forest->setRefreshPhantomBlockMigrationPreparationFunction(prepFunc);
+    } else if(algorithm == "metis") {
+        forest->setRefreshPhantomBlockDataAssignmentFunction(pe::amr::MetisAssignmentFunctor(info, baseWeight));
+        forest->setRefreshPhantomBlockDataPackFunction(pe::amr::MetisAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor());
+        forest->setRefreshPhantomBlockDataUnpackFunction(pe::amr::MetisAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor());
+
+        auto alg = blockforest::DynamicParMetis::stringToAlgorithm(metisAlgorithm);
+        auto vWeight = blockforest::DynamicParMetis::stringToWeightsToUse(metisWeightsToUse);
+        auto eWeight = blockforest::DynamicParMetis::stringToEdgeSource(metisEdgeSource);
+        auto prepFunc = blockforest::DynamicParMetis(alg, vWeight, eWeight);
+
+        prepFunc.setipc2redist(metisipc2redist);
+        forest->setRefreshPhantomBlockMigrationPreparationFunction(prepFunc);
+    } else if(algorithm == "diffusive") {
+        forest->setRefreshPhantomBlockDataAssignmentFunction(pe::amr::WeightAssignmentFunctor(info, baseWeight));
+        forest->setRefreshPhantomBlockDataPackFunction(pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor());
+        forest->setRefreshPhantomBlockDataUnpackFunction(pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor());
+
+        auto prepFunc = blockforest::DynamicDiffusionBalance<pe::amr::WeightAssignmentFunctor::PhantomBlockWeight>(1, 1, false);
+
+        forest->setRefreshPhantomBlockMigrationPreparationFunction(prepFunc);
+    }
 
-    //subdom_min[d] = this->grid_min[d] + rank_length[d] * (real_t)myloc[d];
-    //subdom_max[d] = subdom_min[d] + rank_length[d];
+    forest->addBlockData(make_shared<ParticleDataHandling>(), "Interface");
 }
 
 void BlockForest::finalize() {
diff --git a/runtime/domain/block_forest.hpp b/runtime/domain/block_forest.hpp
index e5f4a68745798b2d8ec0a55909b53ac47f85401e..1a4034928bd2668ad997243f9c5e1eeca6968cac 100644
--- a/runtime/domain/block_forest.hpp
+++ b/runtime/domain/block_forest.hpp
@@ -12,6 +12,7 @@ namespace pairs {
 class BlockForest : public DomainPartitioner {
 private:
     std::shared_ptr<BlockForest> forest;
+    blockforest::InfoCollection info;
     std::map<uint_t, std::vector<math::AABB>> neighborhood;
     std::map<uint_t, std::vector<BlockID>> blocks_pushed;
     std::vector<int> ranks;
@@ -20,29 +21,38 @@ private:
     real_t *subdom_min;
     real_t *subdom_max;
     int world_size, rank, total_aabbs;
+    bool balance_workload;
 
 public:
     BlockForest(
         real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax) :
         DomainPartitioner(xmin, xmax, ymin, ymax, zmin, zmax) {
 
-        subdom_min = new real_t[ndims];
-        subdom_max = new real_t[ndims];
+        subdom = new real_t[ndims * 2];
     }
 
     ~BlockForest() {
-        delete[] subdom_min;
-        delete[] subdom_max;
+        delete[] subdom;
     }
 
-    void setConfig();
     void setBoundingBox();
     void initialize(int *argc, char ***argv);
     void finalize();
     int getWorldSize() const { return world_size; }
     int getRank() const { return rank; }
+    int getNumberOfNeighborRanks() { return this->nranks; }
+    int getNumberOfNeighborAABBs() { return this->naabbs; }
+
+    void initializeWorkloadBalancer();
+    void updateNeighborhood();
+    void updateWeights(real_t *position, int nparticles);
+    Vector3<uint_t> getBlockConfig(uint_t num_processes, uint_t nx, uint_t ny, uint_t nz);
+    uint_t getInitialRefinementLevel(uint_t num_processes);
+    void setBoundingBox();
+    void rebalance();
+
     int isWithinSubdomain(real_t x, real_t y, real_t z);
-    void fillArrays(int *neighbor_ranks, int *pbc, real_t *subdom);
+    void copyRuntimeArray(const std::string& name, void *dest, const int size);
     void communicateSizes(int dim, const int *send_sizes, int *recv_sizes);
     void communicateData(
         int dim, int elem_size,
diff --git a/runtime/domain/regular_6d_stencil.hpp b/runtime/domain/regular_6d_stencil.hpp
index 330af65a6ccb140cef8d283eac3ab183d0503c45..4aa95cd9cb20b7027f032442345124f3cd385d05 100644
--- a/runtime/domain/regular_6d_stencil.hpp
+++ b/runtime/domain/regular_6d_stencil.hpp
@@ -52,8 +52,12 @@ public:
     void setBoundingBox();
     void initialize(int *argc, char ***argv);
     void finalize();
+
     int getWorldSize() const { return world_size; }
     int getRank() const { return rank; }
+    int getNumberOfNeighborRanks() { return 6; }
+    int getNumberOfNeighborAABBs() { return 6; }
+
     int isWithinSubdomain(real_t x, real_t y, real_t z);
     void fillArrays(int *neighbor_ranks, int *pbc, real_t *subdom);
     void communicateSizes(int dim, const int *send_sizes, int *recv_sizes);
diff --git a/runtime/pairs.hpp b/runtime/pairs.hpp
index 94a49fa0b954fa28f1251c330ccfae87dab918a8..000e6c47383a2a6996de27d89a15a4bc8702b3b7 100644
--- a/runtime/pairs.hpp
+++ b/runtime/pairs.hpp
@@ -283,8 +283,9 @@ public:
         const real_t *send_buf, const int *contact_soffsets, const int *nsend_contact,
         real_t *recv_buf, int *contact_roffsets, int *nrecv_contact);
 
-    void PairsSimulation::copyRuntimeArray(
-        const std::string& name, const void *dest, const int size);
+    void copyRuntimeArray(const std::string& name, const void *dest, const int size);
+    int getNumberOfNeighborRanks() { return this->getDomainPartitioner()->getNumberOfNeighborRanks(); }
+    int getNumberOfNeighborAABBs() { return this->getDomainPartitioner()->getNumberOfNeighborAABBs(); }
 
     // Device functions
     void sync() { device_synchronize(); }
diff --git a/src/pairs/sim/domain_partitioning.py b/src/pairs/sim/domain_partitioning.py
index 47e91b360ea2d1ad0401352af4bcf24108d2a8df..aae6dfc060d45fb2bac129ca169fe9296fd64cd5 100644
--- a/src/pairs/sim/domain_partitioning.py
+++ b/src/pairs/sim/domain_partitioning.py
@@ -100,13 +100,27 @@ class BlockForest:
     def initialize(self):
         grid_array = [(self.sim.grid.min(d), self.sim.grid.max(d)) for d in range(self.sim.ndims())]
         Call_Void(self.sim, "pairs->initDomain", [param for delim in grid_array for param in delim])
-        # TODO: Check nranks_capacity and aabb_capacity and resize arrays if needed
-        Call_Void(self.sim, "pairs->copyRuntimeArray", ['ranks', self.ranks, self.nranks_capacity])
-        Call_Void(self.sim, "pairs->copyRuntimeArray", ['naabbs', self.naabbs, self.nranks_capacity])
-        Call_Void(self.sim, "pairs->copyRuntimeArray", ['rank_offsets', self.rank_offsets, self.nranks_capacity])
-        Call_Void(self.sim, "pairs->copyRuntimeArray", ['pbc', self.pbc, self.aabb_capacity * 3])
-        Call_Void(self.sim, "pairs->copyRuntimeArray", ['aabbs', self.aabbs, self.aabb_capacity * 6])
-        Call_Void(self.sim, "pairs->copyRuntimeArray", ['subdom', self.subdom, sim.ndims() * 2])
+
+        Assign(self.sim, self.nranks, Call_Int(self.sim, "pairs->getNumberOfNeighborRanks", []))
+        Assign(self.sim, self.naabbs, Call_Int(self.sim, "pairs->getNumberOfNeighborAABBs", []))
+
+        for _ in Filter(self.sim, self.nranks_capacity < self.nranks):
+            Assign(self.sim, self.nranks_capacity, self.nranks + 10)
+            self.ranks.realloc()
+            self.naabbs.realloc()
+            self.offsets.realloc()
+
+        for _ in Filter(self.sim, self.aabb_capacity < self.naabbs):
+            Assign(self.sim, self.aabb_capacity, self.naabbs + 20)
+            self.pbc.realloc()
+            self.aabbs.realloc()
+
+        Call_Void(self.sim, "pairs->copyRuntimeArray", ['ranks', self.ranks, self.nranks])
+        Call_Void(self.sim, "pairs->copyRuntimeArray", ['naabbs', self.naabbs, self.nranks])
+        Call_Void(self.sim, "pairs->copyRuntimeArray", ['rank_offsets', self.rank_offsets, self.nranks])
+        Call_Void(self.sim, "pairs->copyRuntimeArray", ['pbc', self.pbc, self.naabbs * 3])
+        Call_Void(self.sim, "pairs->copyRuntimeArray", ['aabbs', self.aabbs, self.naabbs * 6])
+        Call_Void(self.sim, "pairs->copyRuntimeArray", ['subdom', self.subdom, self.sim.ndims() * 2])
 
     def ghost_particles(self, step, position, offset=0.0):
         # Particles with one of the following flags are ignored