diff --git a/runtime/domain/regular_6d_stencil.hpp b/runtime/domain/regular_6d_stencil.hpp
index ddff6e865a276a29c6cc9c071bd922395b4a114d..0658191675d8118015e506f56120f0a5b8b969fe 100644
--- a/runtime/domain/regular_6d_stencil.hpp
+++ b/runtime/domain/regular_6d_stencil.hpp
@@ -26,24 +26,28 @@ public:
         static_assert(ndims == 3, "setConfig() only implemented for three dimensions!");
         real_t area[ndims];
         real_t best_surf = 0.0;
-
-        for(int d = 0; d < ndims; d++) {
-            this->nranks[d] = 1;
-            area[d] = (this->grid_max[d] - this->grid_min[d]) * (this->grid_max[d] - this->grid_min[d]);
-            best_surf += 2.0 * area[d];
+        int d = 0;
+
+        for(int d1 = 0; d1 < ndims; d1++) {
+            nranks[d1] = 1;
+            for(int d2 = d1 + 1; d2 < ndims; d2++) {
+                area[d] = (this->grid_max[d1] - this->grid_min[d1]) * (this->grid_max[d2] - this->grid_min[d2]);
+                best_surf += 2.0 * area[d];
+                d++;
+            }
         }
 
-        for(int i = 1; i < this->world_size; i++) {
-            if(this->world_size % i == 0) {
-                const int rem_yz = this->world_size / i;
-                for(int j = 0; j < rem_yz; j++) {
+        for(int i = 1; i < world_size; i++) {
+            if(world_size % i == 0) {
+                const int rem_yz = world_size / i;
+                for(int j = 1; j < rem_yz; j++) {
                     if(rem_yz % j == 0) {
                         const int k = rem_yz / j;
                         const real_t surf = (area[0] / i / j) + (area[1] / i / k) + (area[2] / j / k);
                         if(surf < best_surf) {
-                            this->nranks[0] = i;
-                            this->nranks[1] = j;
-                            this->nranks[2] = k;
+                            nranks[0] = i;
+                            nranks[1] = j;
+                            nranks[2] = k;
                             best_surf = surf;
                         }
                     }
@@ -61,17 +65,17 @@ public:
 
         for(int d = 0; d < ndims; d++) {
             periods[d] = 1;
-            rank_length[d] = (this->grid_max[d] - this->grid_min[d]) / (real_t)this->nranks[d];
+            rank_length[d] = (this->grid_max[d] - this->grid_min[d]) / (real_t) nranks[d];
         }
 
-        MPI_Cart_create(MPI_COMM_WORLD, ndims, this->nranks, periods, reorder, &cartesian);
-        MPI_Cart_get(cartesian, ndims, this->nranks, periods, myloc);
+        MPI_Cart_create(MPI_COMM_WORLD, ndims, nranks, periods, reorder, &cartesian);
+        MPI_Cart_get(cartesian, ndims, nranks, periods, myloc);
         for(int d = 0; d < ndims; d++) {
-            MPI_Cart_shift(cartesian, d, 1, &(this->prev[d]), &(this->next[d]));
-            this->pbc_prev[d] = (myloc[d] == 0) ? 1 : 0;
-            this->pbc_next[d] = (myloc[d] == this->nranks[d] - 1) ? -1 : 0;
-            this->subdom_min[d] = this->grid_min[d] + rank_length[d] * (real_t)myloc[d];
-            this->subdom_max[d] = this->subdom_min[d] + rank_length[d];
+            MPI_Cart_shift(cartesian, d, 1, &(prev[d]), &(next[d]));
+            pbc_prev[d] = (myloc[d] == 0) ? 1 : 0;
+            pbc_next[d] = (myloc[d] == nranks[d] - 1) ? -1 : 0;
+            subdom_min[d] = this->grid_min[d] + rank_length[d] * (real_t)myloc[d];
+            subdom_max[d] = subdom_min[d] + rank_length[d];
         }
 
         MPI_Comm_free(&cartesian);
@@ -89,18 +93,28 @@ public:
         MPI_Finalize();
     }
 
+    int getWorldSize() const { return world_size; }
+    int getRank() const { return rank; }
+    int isWithinSubdomain(real_t x, real_t y, real_t z) {
+        return x > subdom_min[0] && x < subdom_max[0] &&
+               y > subdom_min[1] && y < subdom_max[1] &&
+               z > subdom_min[2] && z < subdom_max[2];
+    }
+
     void fillArrays(int neighbor_ranks[], int pbc[], real_t subdom[]) {
         for(int d = 0; d < ndims; d++) {
-            neighbor_ranks[d * 2 + 0] = this->prev[d];
-            neighbor_ranks[d * 2 + 1] = this->next[d];
-            pbc[d * 2 + 0] = this->pbc_prev[d];
-            pbc[d * 2 + 1] = this->pbc_next[d];
-            subdom[d * 2 + 0] = this->subdom_min[d];
-            subdom[d * 2 + 1] = this->subdom_max[d];
+            neighbor_ranks[d * 2 + 0] = prev[d];
+            neighbor_ranks[d * 2 + 1] = next[d];
+            pbc[d * 2 + 0] = pbc_prev[d];
+            pbc[d * 2 + 1] = pbc_next[d];
+            subdom[d * 2 + 0] = subdom_min[d];
+            subdom[d * 2 + 1] = subdom_max[d];
         }
     }
 
     void communicateSizes(int dim, const int *send_sizes, int *recv_sizes) {
+        std::cout << "communicateSizes" << std::endl;
+
         if(prev[dim] != rank) {
             MPI_Send(&send_sizes[dim * 2 + 0], 1, MPI_INT, prev[dim], 0, MPI_COMM_WORLD);
             MPI_Recv(&recv_sizes[dim * 2 + 0], 1, MPI_INT, next[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
@@ -120,6 +134,7 @@ public:
         int dim, int elem_size,
         const real_t *send_buf, const int *send_offsets, const int *nsend,
         real_t *recv_buf, const int *recv_offsets, const int *nrecv) {
+        std::cout << "communicateData" << std::endl;
 
         const real_t *send_prev = &send_buf[send_offsets[dim * 2 + 0] * elem_size];
         const real_t *send_next = &send_buf[send_offsets[dim * 2 + 1] * elem_size];
@@ -127,8 +142,15 @@ public:
         real_t *recv_next = &recv_buf[recv_offsets[dim * 2 + 1] * elem_size];
 
         if(prev[dim] != rank) {
-            MPI_Send(send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, MPI_COMM_WORLD);
-            MPI_Recv(recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, next[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+            std::cout << rank << ": send " << nsend[dim * 2 + 0] << " elems to " << prev[dim] << ", recv " << nrecv[dim * 2 + 0] << " elems from " << next[dim] << std::endl;
+            //MPI_Send(send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, MPI_COMM_WORLD);
+            //MPI_Recv(recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, next[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+            MPI_Sendrecv(
+                send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0,
+                recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, next[dim], 0,
+                MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
         } else {
             for(int i = 0; i < nsend[dim * 2 + 0] * elem_size; i++) {
                 recv_prev[i] = send_prev[i];
@@ -136,8 +158,15 @@ public:
         }
 
         if(next[dim] != rank) {
-            MPI_Send(send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, MPI_COMM_WORLD);
-            MPI_Recv(recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, prev[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+            std::cout << rank << ": send " << nsend[dim * 2 + 1] << " elems to " << next[dim] << ", recv " << nrecv[dim * 2 + 1] << " elems from " << prev[dim] << std::endl;
+            //MPI_Send(send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, MPI_COMM_WORLD);
+            //MPI_Recv(recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, prev[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+            MPI_Sendrecv(
+                send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0,
+                recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, prev[dim], 0,
+                MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
         } else {
             for(int i = 0; i < nsend[dim * 2 + 1] * elem_size; i++) {
                 recv_next[i] = send_next[i];
diff --git a/runtime/pairs.hpp b/runtime/pairs.hpp
index c66103a94b1a5b195ba66a0781c3f0cebb2027bb..3c37795dc327780f1b43c0a6366d8d52fc10c28f 100644
--- a/runtime/pairs.hpp
+++ b/runtime/pairs.hpp
@@ -520,7 +520,7 @@ public:
 
     void communicateSizes(int dim, const int *send_sizes, int *recv_sizes) {
         this->getDomainPartitioner()->communicateSizes(dim, send_sizes, recv_sizes);
-        PAIRS_DEBUG("send_sizes=[%d, %d], recv_sizes=[%d, %d]\n", send_sizes[dim * 2 + 0], recv_sizes[dim * 2 + 1], recv_sizes[dim * 2 + 0], recv_sizes[dim * 2 + 1]);
+        PAIRS_DEBUG("send_sizes=[%d, %d], recv_sizes=[%d, %d]\n", send_sizes[dim * 2 + 0], send_sizes[dim * 2 + 1], recv_sizes[dim * 2 + 0], recv_sizes[dim * 2 + 1]);
     }
 
     void communicateData(
@@ -530,6 +530,8 @@ public:
 
         this->getDomainPartitioner()->communicateData(dim, elem_size, send_buf, send_offsets, nsend, recv_buf, recv_offsets, nrecv);
 
+        /*
+        // Debug messages
         const int elems_to_print = 5;
 
         // Send buffer debug
@@ -579,6 +581,7 @@ public:
 
             PAIRS_DEBUG("\b]\n");
         }
+        */
     }
 
     void fillCommunicationArrays(int neighbor_ranks[], int pbc[], real_t subdom[]) {
diff --git a/runtime/read_from_file.hpp b/runtime/read_from_file.hpp
index 2404e9bb22105c1825c15efa838217d1f052fa4e..7a681dc0b564bd005a357e6b7a6694155a015990 100644
--- a/runtime/read_from_file.hpp
+++ b/runtime/read_from_file.hpp
@@ -10,53 +10,75 @@
 namespace pairs {
 
 template<int ndims>
-size_t read_particle_data(PairsSimulation<ndims> *ps, const char *filename, double *grid_buffer, const property_t properties[], size_t nprops) {
+void read_grid_data(PairsSimulation<ndims> *ps, const char *filename, double *grid_buffer) {
+    std::ifstream in_file(filename, std::ifstream::in);
+    std::string line;
+
+    if(in_file.is_open()) {
+        std::getline(in_file, line);
+        std::stringstream line_stream(line);
+        std::string in0;
+        int i = 0;
+
+        while(std::getline(line_stream, in0, ',')) {
+            PAIRS_ASSERT(i < ndims * 2);
+            grid_buffer[i] = std::stod(in0);
+            i++;
+        }
+
+        in_file.close();
+    }
+}
+
+template<int ndims>
+size_t read_particle_data(PairsSimulation<ndims> *ps, const char *filename, const property_t properties[], size_t nprops) {
     std::ifstream in_file(filename, std::ifstream::in);
     std::string line;
     size_t n = 0;
-    int read_grid_data = 0;
 
     if(in_file.is_open()) {
+        std::getline(in_file, line);
         while(std::getline(in_file, line)) {
             std::stringstream line_stream(line);
             std::string in0;
+            int within_domain = 1;
             int i = 0;
 
             while(std::getline(line_stream, in0, ',')) {
-                if(!read_grid_data) {
-                    PAIRS_ASSERT(i < ndims * 2);
-                    grid_buffer[i] = std::stod(in0);
-                } else {
-                    PAIRS_ASSERT(i < nprops);
-                    property_t p_id = properties[i];
-                    auto prop = ps->getProperty(p_id);
-                    auto prop_type = prop.getType();
-
-                    if(prop_type == Prop_Vector) {
-                        auto vector_ptr = ps->getAsVectorProperty(prop);
-                        std::string in1, in2;
-                        std::getline(line_stream, in1, ',');
-                        std::getline(line_stream, in2, ',');
-                        vector_ptr(n, 0) = std::stod(in0);
-                        vector_ptr(n, 1) = std::stod(in1);
-                        vector_ptr(n, 2) = std::stod(in2);
-                    } else if(prop_type == Prop_Integer) {
-                        auto int_ptr = ps->getAsIntegerProperty(prop);
-                        int_ptr(n) = std::stoi(in0);
-                    } else if(prop_type == Prop_Float) {
-                        auto float_ptr = ps->getAsFloatProperty(prop);
-                        float_ptr(n) = std::stod(in0);
-                    } else {
-                        std::cerr << "read_particle_data(): Invalid property type!" << std::endl;
-                        return 0;
+                property_t p_id = properties[i];
+                auto prop = ps->getProperty(p_id);
+                auto prop_type = prop.getType();
+
+                if(prop_type == Prop_Vector) {
+                    auto vector_ptr = ps->getAsVectorProperty(prop);
+                    std::string in1, in2;
+                    std::getline(line_stream, in1, ',');
+                    std::getline(line_stream, in2, ',');
+                    real_t x = std::stod(in0);
+                    real_t y = std::stod(in1);
+                    real_t z = std::stod(in2);
+                    vector_ptr(n, 0) = x;
+                    vector_ptr(n, 1) = y;
+                    vector_ptr(n, 2) = z;
+
+                    if(prop.getName() == "position") {
+                        within_domain = ps->getDomainPartitioner()->isWithinSubdomain(x, y, z);
                     }
+                } else if(prop_type == Prop_Integer) {
+                    auto int_ptr = ps->getAsIntegerProperty(prop);
+                    int_ptr(n) = std::stoi(in0);
+                } else if(prop_type == Prop_Float) {
+                    auto float_ptr = ps->getAsFloatProperty(prop);
+                    float_ptr(n) = std::stod(in0);
+                } else {
+                    std::cerr << "read_particle_data(): Invalid property type!" << std::endl;
+                    return 0;
                 }
 
                 i++;
             }
 
-            n += (read_grid_data) ? 1 : 0;
-            read_grid_data = 1;
+            n += (within_domain) ? 1 : 0;
         }
 
         in_file.close();
diff --git a/runtime/vtk.hpp b/runtime/vtk.hpp
index 894fc119a6b57444ab2a076c66069f15643be788..eb10bce12a4c7902078629062371323c3d12200e 100644
--- a/runtime/vtk.hpp
+++ b/runtime/vtk.hpp
@@ -11,12 +11,18 @@ namespace pairs {
 template <int ndims>
 void vtk_write_data(PairsSimulation<ndims> *ps, const char *filename, int start, int end, int timestep) {
     std::string output_filename(filename);
-    std::ostringstream filename_oss;
-    filename_oss << filename << "_" << timestep << ".vtk";
-    std::ofstream out_file(filename_oss.str());
     auto masses = ps->getAsFloatProperty(ps->getPropertyByName("mass"));
     auto positions = ps->getAsVectorProperty(ps->getPropertyByName("position"));
     const int n = end - start;
+    std::ostringstream filename_oss;
+
+    filename_oss << filename << "_";
+    if(ps->getDomainPartitioner()->getWorldSize() > 1) {
+        filename_oss << "r" << ps->getDomainPartitioner()->getRank() << "_";
+    }
+
+    filename_oss << timestep << ".vtk";
+    std::ofstream out_file(filename_oss.str());
 
     ps->copyPropertyToHost(masses);
     ps->copyPropertyToHost(positions);