diff --git a/Makefile b/Makefile
index ba82842577390602a55904c2cb8c71a8863909cc..ca5aa90506fea55ecd3a520d413260f3246f0d54 100644
--- a/Makefile
+++ b/Makefile
@@ -18,11 +18,13 @@ lj_ns.cu:
 # Targets
 lj_cpu: lj_ns.cpp
 #	g++ -o lj_cpu lj_ns.cpp -DDEBUG
-	mpic++ -O3 -o lj_cpu lj_ns.cpp runtime/pairs.cpp runtime/devices/dummy.cpp -DDEBUG
+	mpic++ -O3 -o lj_cpu lj_ns.cpp runtime/pairs.cpp runtime/domain/regular_6d_stencil.cpp runtime/devices/dummy.cpp -DDEBUG
 
 lj_gpu: lj_ns.cu
-	nvcc -o lj_gpu lj_ns.cu
+	mpic++ -c -o pairs.o runtime/pairs.cpp -DDEBUG
+	mpic++ -c -o regular_6d_stencil.o runtime/domain/regular_6d_stencil.cpp -DDEBUG
+	nvcc -o lj_gpu runtime/devices/cuda.cu runtime.o regular_6d_stencil.o lj_ns.cu
 
 clean:
 	@echo "Cleaning..."
-	rm -rf build lj_cpu lj_gpu lj_ns.cpp lj_ns.cu dist pairs.egg-info functions functions.pdf
+	rm -rf build lj_cpu lj_gpu lj_ns.cpp lj_ns.cu dist pairs.egg-info functions functions.pdf pairs.o regular_6d_stencil.o
diff --git a/runtime/devices/cuda.cu b/runtime/devices/cuda.cu
index 50b35006f2bbf70be3879abd539eaa882e66f4c2..c6ecfb5f2d000a43f36e35b747411115faa573b8 100644
--- a/runtime/devices/cuda.cu
+++ b/runtime/devices/cuda.cu
@@ -1,8 +1,6 @@
 #include <cuda_runtime.h>
 #include <iostream>
 
-#pragma once
-
 #define CUDA_ASSERT(a) { pairs::cuda_assert((a), __FILE__, __LINE__); }
 
 namespace pairs {
diff --git a/runtime/domain/domain_partitioning.hpp b/runtime/domain/domain_partitioning.hpp
index c4a231b3e14c5ab6ea911f1b830a348399f55677..e08e5eebba5576d6094205a837d69c606e4f7662 100644
--- a/runtime/domain/domain_partitioning.hpp
+++ b/runtime/domain/domain_partitioning.hpp
@@ -1,5 +1,3 @@
-#include <mpi.h>
-//---
 #include "../pairs_common.hpp"
 
 #pragma once
diff --git a/runtime/domain/regular_6d_stencil.cpp b/runtime/domain/regular_6d_stencil.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..659c3d397372f49d2cad4e5479ba0a3d7429473f
--- /dev/null
+++ b/runtime/domain/regular_6d_stencil.cpp
@@ -0,0 +1,161 @@
+#include <mpi.h>
+//---
+#include "../pairs_common.hpp"
+#include "regular_6d_stencil.hpp"
+
+namespace pairs {
+
+void Regular6DStencil::setConfig() {
+    PAIRS_ASSERT(ndims == 3);
+    real_t area[3];
+    real_t best_surf = 0.0;
+    int d = 0;
+
+    for(int d1 = 0; d1 < ndims; d1++) {
+        nranks[d1] = 1;
+        for(int d2 = d1 + 1; d2 < ndims; d2++) {
+            area[d] = (this->grid_max[d1] - this->grid_min[d1]) * (this->grid_max[d2] - this->grid_min[d2]);
+            best_surf += 2.0 * area[d];
+            d++;
+        }
+    }
+
+    for(int i = 1; i < world_size; i++) {
+        if(world_size % i == 0) {
+            const int rem_yz = world_size / i;
+            for(int j = 1; j < rem_yz; j++) {
+                if(rem_yz % j == 0) {
+                    const int k = rem_yz / j;
+                    const real_t surf = (area[0] / i / j) + (area[1] / i / k) + (area[2] / j / k);
+                    if(surf < best_surf) {
+                        nranks[0] = i;
+                        nranks[1] = j;
+                        nranks[2] = k;
+                        best_surf = surf;
+                    }
+                }
+            }
+        }
+    }
+}
+
+void Regular6DStencil::setBoundingBox() {
+    MPI_Comm cartesian;
+    int *myloc = new int[ndims];
+    int *periods = new int[ndims];
+    real_t *rank_length = new real_t[ndims];
+    int reorder = 0;
+
+    for(int d = 0; d < ndims; d++) {
+        periods[d] = 1;
+        rank_length[d] = (this->grid_max[d] - this->grid_min[d]) / (real_t) nranks[d];
+    }
+
+    MPI_Cart_create(MPI_COMM_WORLD, ndims, nranks, periods, reorder, &cartesian);
+    MPI_Cart_get(cartesian, ndims, nranks, periods, myloc);
+    for(int d = 0; d < ndims; d++) {
+        MPI_Cart_shift(cartesian, d, 1, &(prev[d]), &(next[d]));
+        pbc_prev[d] = (myloc[d] == 0) ? 1 : 0;
+        pbc_next[d] = (myloc[d] == nranks[d] - 1) ? -1 : 0;
+        subdom_min[d] = this->grid_min[d] + rank_length[d] * (real_t)myloc[d];
+        subdom_max[d] = subdom_min[d] + rank_length[d];
+    }
+
+    delete[] myloc;
+    delete[] periods;
+    delete[] rank_length;
+    MPI_Comm_free(&cartesian);
+}
+
+void Regular6DStencil::initialize(int *argc, char ***argv) {
+    MPI_Init(argc, argv);
+    MPI_Comm_size(MPI_COMM_WORLD, &world_size);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    this->setConfig();
+    this->setBoundingBox();
+}
+
+void Regular6DStencil::finalize() {
+    MPI_Finalize();
+}
+
+int Regular6DStencil::isWithinSubdomain(real_t x, real_t y, real_t z) {
+    return x >= subdom_min[0] && x < subdom_max[0] - SMALL &&
+           y >= subdom_min[1] && y < subdom_max[1] - SMALL &&
+           z >= subdom_min[2] && z < subdom_max[2] - SMALL;
+}
+
+void Regular6DStencil::fillArrays(int *neighbor_ranks, int *pbc, real_t *subdom) {
+    for(int d = 0; d < ndims; d++) {
+        neighbor_ranks[d * 2 + 0] = prev[d];
+        neighbor_ranks[d * 2 + 1] = next[d];
+        pbc[d * 2 + 0] = pbc_prev[d];
+        pbc[d * 2 + 1] = pbc_next[d];
+        subdom[d * 2 + 0] = subdom_min[d];
+        subdom[d * 2 + 1] = subdom_max[d];
+    }
+}
+
+void Regular6DStencil::communicateSizes(int dim, const int *send_sizes, int *recv_sizes) {
+    std::cout << "communicateSizes" << std::endl;
+
+    if(prev[dim] != rank) {
+        MPI_Send(&send_sizes[dim * 2 + 0], 1, MPI_INT, prev[dim], 0, MPI_COMM_WORLD);
+        MPI_Recv(&recv_sizes[dim * 2 + 0], 1, MPI_INT, next[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    } else {
+        recv_sizes[dim * 2 + 0] = send_sizes[dim * 2 + 0];
+    }
+
+    if(next[dim] != rank) {
+        MPI_Send(&send_sizes[dim * 2 + 1], 1, MPI_INT, next[dim], 0, MPI_COMM_WORLD);
+        MPI_Recv(&recv_sizes[dim * 2 + 1], 1, MPI_INT, prev[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    } else {
+        recv_sizes[dim * 2 + 1] = send_sizes[dim * 2 + 1];
+    }
+}
+
+void Regular6DStencil::communicateData(
+    int dim, int elem_size,
+    const real_t *send_buf, const int *send_offsets, const int *nsend,
+    real_t *recv_buf, const int *recv_offsets, const int *nrecv) {
+    std::cout << "communicateData" << std::endl;
+
+    const real_t *send_prev = &send_buf[send_offsets[dim * 2 + 0] * elem_size];
+    const real_t *send_next = &send_buf[send_offsets[dim * 2 + 1] * elem_size];
+    real_t *recv_prev = &recv_buf[recv_offsets[dim * 2 + 0] * elem_size];
+    real_t *recv_next = &recv_buf[recv_offsets[dim * 2 + 1] * elem_size];
+
+    if(prev[dim] != rank) {
+        std::cout << rank << ": send " << nsend[dim * 2 + 0] << " elems to " << prev[dim] << ", recv " << nrecv[dim * 2 + 0] << " elems from " << next[dim] << std::endl;
+        //MPI_Send(send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, MPI_COMM_WORLD);
+        //MPI_Recv(recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, next[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+        MPI_Sendrecv(
+            send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0,
+            recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, next[dim], 0,
+            MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+    } else {
+        for(int i = 0; i < nsend[dim * 2 + 0] * elem_size; i++) {
+            recv_prev[i] = send_prev[i];
+        }
+    }
+
+    if(next[dim] != rank) {
+        std::cout << rank << ": send " << nsend[dim * 2 + 1] << " elems to " << next[dim] << ", recv " << nrecv[dim * 2 + 1] << " elems from " << prev[dim] << std::endl;
+        //MPI_Send(send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, MPI_COMM_WORLD);
+        //MPI_Recv(recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, prev[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+        MPI_Sendrecv(
+            send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0,
+            recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, prev[dim], 0,
+            MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+    } else {
+        for(int i = 0; i < nsend[dim * 2 + 1] * elem_size; i++) {
+            recv_next[i] = send_next[i];
+        }
+    }
+}
+
+}
diff --git a/runtime/domain/regular_6d_stencil.hpp b/runtime/domain/regular_6d_stencil.hpp
index 631ded149ca38869db7f9c797dbd61cc6aa80075..1c12ff45de9a2a1a69b1067cdc42d4c2e995c338 100644
--- a/runtime/domain/regular_6d_stencil.hpp
+++ b/runtime/domain/regular_6d_stencil.hpp
@@ -3,7 +3,7 @@
 
 #pragma once
 
-#define SMALL   0.00001
+#define SMALL 0.00001
 
 namespace pairs {
 
@@ -41,160 +41,19 @@ public:
         delete[] subdom_max;
     }
 
-    void setConfig() {
-        PAIRS_ASSERT(ndims == 3);
-        real_t area[3];
-        real_t best_surf = 0.0;
-        int d = 0;
-
-        for(int d1 = 0; d1 < ndims; d1++) {
-            nranks[d1] = 1;
-            for(int d2 = d1 + 1; d2 < ndims; d2++) {
-                area[d] = (this->grid_max[d1] - this->grid_min[d1]) * (this->grid_max[d2] - this->grid_min[d2]);
-                best_surf += 2.0 * area[d];
-                d++;
-            }
-        }
-
-        for(int i = 1; i < world_size; i++) {
-            if(world_size % i == 0) {
-                const int rem_yz = world_size / i;
-                for(int j = 1; j < rem_yz; j++) {
-                    if(rem_yz % j == 0) {
-                        const int k = rem_yz / j;
-                        const real_t surf = (area[0] / i / j) + (area[1] / i / k) + (area[2] / j / k);
-                        if(surf < best_surf) {
-                            nranks[0] = i;
-                            nranks[1] = j;
-                            nranks[2] = k;
-                            best_surf = surf;
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    void setBoundingBox() {
-        MPI_Comm cartesian;
-        int *myloc = new int[ndims];
-        int *periods = new int[ndims];
-        real_t *rank_length = new real_t[ndims];
-        int reorder = 0;
-
-        for(int d = 0; d < ndims; d++) {
-            periods[d] = 1;
-            rank_length[d] = (this->grid_max[d] - this->grid_min[d]) / (real_t) nranks[d];
-        }
-
-        MPI_Cart_create(MPI_COMM_WORLD, ndims, nranks, periods, reorder, &cartesian);
-        MPI_Cart_get(cartesian, ndims, nranks, periods, myloc);
-        for(int d = 0; d < ndims; d++) {
-            MPI_Cart_shift(cartesian, d, 1, &(prev[d]), &(next[d]));
-            pbc_prev[d] = (myloc[d] == 0) ? 1 : 0;
-            pbc_next[d] = (myloc[d] == nranks[d] - 1) ? -1 : 0;
-            subdom_min[d] = this->grid_min[d] + rank_length[d] * (real_t)myloc[d];
-            subdom_max[d] = subdom_min[d] + rank_length[d];
-        }
-
-        delete[] myloc;
-        delete[] periods;
-        delete[] rank_length;
-        MPI_Comm_free(&cartesian);
-    }
-
-    void initialize(int *argc, char ***argv) {
-        MPI_Init(argc, argv);
-        MPI_Comm_size(MPI_COMM_WORLD, &world_size);
-        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-        this->setConfig();
-        this->setBoundingBox();
-    }
-
-    void finalize() {
-        MPI_Finalize();
-    }
-
+    void setConfig();
+    void setBoundingBox();
+    void initialize(int *argc, char ***argv);
+    void finalize();
     int getWorldSize() const { return world_size; }
     int getRank() const { return rank; }
-    int isWithinSubdomain(real_t x, real_t y, real_t z) {
-        return x >= subdom_min[0] && x < subdom_max[0] - SMALL &&
-               y >= subdom_min[1] && y < subdom_max[1] - SMALL &&
-               z >= subdom_min[2] && z < subdom_max[2] - SMALL;
-    }
-
-    void fillArrays(int *neighbor_ranks, int *pbc, real_t *subdom) {
-        for(int d = 0; d < ndims; d++) {
-            neighbor_ranks[d * 2 + 0] = prev[d];
-            neighbor_ranks[d * 2 + 1] = next[d];
-            pbc[d * 2 + 0] = pbc_prev[d];
-            pbc[d * 2 + 1] = pbc_next[d];
-            subdom[d * 2 + 0] = subdom_min[d];
-            subdom[d * 2 + 1] = subdom_max[d];
-        }
-    }
-
-    void communicateSizes(int dim, const int *send_sizes, int *recv_sizes) {
-        std::cout << "communicateSizes" << std::endl;
-
-        if(prev[dim] != rank) {
-            MPI_Send(&send_sizes[dim * 2 + 0], 1, MPI_INT, prev[dim], 0, MPI_COMM_WORLD);
-            MPI_Recv(&recv_sizes[dim * 2 + 0], 1, MPI_INT, next[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-        } else {
-            recv_sizes[dim * 2 + 0] = send_sizes[dim * 2 + 0];
-        }
-
-        if(next[dim] != rank) {
-            MPI_Send(&send_sizes[dim * 2 + 1], 1, MPI_INT, next[dim], 0, MPI_COMM_WORLD);
-            MPI_Recv(&recv_sizes[dim * 2 + 1], 1, MPI_INT, prev[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-        } else {
-            recv_sizes[dim * 2 + 1] = send_sizes[dim * 2 + 1];
-        }
-    }
-
+    int isWithinSubdomain(real_t x, real_t y, real_t z);
+    void fillArrays(int *neighbor_ranks, int *pbc, real_t *subdom);
+    void communicateSizes(int dim, const int *send_sizes, int *recv_sizes);
     void communicateData(
         int dim, int elem_size,
         const real_t *send_buf, const int *send_offsets, const int *nsend,
-        real_t *recv_buf, const int *recv_offsets, const int *nrecv) {
-        std::cout << "communicateData" << std::endl;
-
-        const real_t *send_prev = &send_buf[send_offsets[dim * 2 + 0] * elem_size];
-        const real_t *send_next = &send_buf[send_offsets[dim * 2 + 1] * elem_size];
-        real_t *recv_prev = &recv_buf[recv_offsets[dim * 2 + 0] * elem_size];
-        real_t *recv_next = &recv_buf[recv_offsets[dim * 2 + 1] * elem_size];
-
-        if(prev[dim] != rank) {
-            std::cout << rank << ": send " << nsend[dim * 2 + 0] << " elems to " << prev[dim] << ", recv " << nrecv[dim * 2 + 0] << " elems from " << next[dim] << std::endl;
-            //MPI_Send(send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, MPI_COMM_WORLD);
-            //MPI_Recv(recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, next[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-
-            MPI_Sendrecv(
-                send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0,
-                recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, next[dim], 0,
-                MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-
-        } else {
-            for(int i = 0; i < nsend[dim * 2 + 0] * elem_size; i++) {
-                recv_prev[i] = send_prev[i];
-            }
-        }
-
-        if(next[dim] != rank) {
-            std::cout << rank << ": send " << nsend[dim * 2 + 1] << " elems to " << next[dim] << ", recv " << nrecv[dim * 2 + 1] << " elems from " << prev[dim] << std::endl;
-            //MPI_Send(send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, MPI_COMM_WORLD);
-            //MPI_Recv(recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, prev[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-
-            MPI_Sendrecv(
-                send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0,
-                recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, prev[dim], 0,
-                MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-
-        } else {
-            for(int i = 0; i < nsend[dim * 2 + 1] * elem_size; i++) {
-                recv_next[i] = send_next[i];
-            }
-        }
-    }
+        real_t *recv_buf, const int *recv_offsets, const int *nrecv);
 };
 
 }