diff --git a/CMakeLists.txt b/CMakeLists.txt
index 67d70743c918a04b7b17af09d987599ab3d5c67e..ce1ea95414ae5469c6a5a92e1a4be74f4bd699d7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,9 +1,5 @@
 cmake_minimum_required(VERSION 3.8.2 FATAL_ERROR)
-
 project(pairs CXX)
-#enable_testing()
-
-#find_package(Python COMPONENTS Interpreter Development)
 
 set(TESTCASE ${TESTCASE} CACHE STRING "Select the testcase from the following: md, dem")
 
@@ -20,11 +16,23 @@ endif()
 string(TOLOWER "${TESTCASE}" TESTCASE)
 message(STATUS "Selected testcase: ${TESTCASE}")
 
+option(DEBUG "DEBUG" ON)
 option(USE_WALBERLA "USE_WALBERLA" ON)
 option(USE_MPI "USE_MPI" ON)
 option(COMPILE_CUDA "COMPILE_CUDA" ON)
 option(ENABLE_GPU_DIRECT "ENABLE_GPU_DIRECT" ON)
 
+if(DEBUG)
+    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDEBUG ")
+endif()
+
+if(USE_WALBERLA)
+    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_WALBERLA_LOAD_BALANCING ")
+    find_package(waLBerla REQUIRED)
+    add_subdirectory(${walberla_SOURCE_DIR} ${walberla_BINARY_DIR} EXCLUDE_FROM_ALL)
+    waLBerla_import()
+endif()
+
 set(CPU_SRC "${TESTCASE}.cpp")
 set(GPU_SRC "${TESTCASE}.cu")
 set(CPU_BIN "${TESTCASE}_cpu")
@@ -51,42 +59,38 @@ execute_process(
     COMMAND ${PYTHON_EXECUTABLE} setup.py install --user
     WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
 
-add_executable(${CPU_BIN} ${CPU_SRC} ${RUNTIME_COMMON_FILES} ${RUNTIME_CPU_FILES})
-
 if(USE_WALBERLA)
-    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_WALBERLA_LOAD_BALANCING ")
-    find_package(waLBerla REQUIRED)
-    add_subdirectory(${walberla_SOURCE_DIR} ${walberla_BINARY_DIR} EXCLUDE_FROM_ALL)
-    waLBerla_import()
-
     waLBerla_add_executable(
         NAME ${CPU_BIN}
-        FILES ${CPU_SRC} ${RUNTIME_COMMON_FILES} ${RUNTIME_CPU_FILES}
-        DEPENDS blockforest core pe ${CPU_SRC})
+        FILES ${RUNTIME_COMMON_FILES} ${RUNTIME_CPU_FILES} ${CMAKE_BINARY_DIR}/${CPU_SRC}
+        DEPENDS blockforest core pe)
+else()
+    add_executable(${CPU_BIN} ${CPU_SRC} ${RUNTIME_COMMON_FILES} ${RUNTIME_CPU_FILES})
 endif()
 
 add_library(runtime_cpu STATIC runtime/devices/dummy.cpp)
-#target_link_libraries(${CPU_BIN} runtime_cpu)
+target_link_libraries(${CPU_BIN} runtime_cpu)
 
 add_custom_command(
-    OUTPUT ${CPU_SRC}
+    OUTPUT ${CMAKE_BINARY_DIR}/${CPU_SRC}
     COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/examples/${TESTCASE}.py cpu
     COMMENT "Generate CPU code"
     DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/examples/${TESTCASE}.py)
 
-add_custom_target(gen_cpu DEPENDS ${CPU_SRC})
+add_custom_target(gen_cpu DEPENDS ${CMAKE_BINARY_DIR}/${CPU_SRC})
 add_dependencies(${CPU_BIN} gen_cpu)
 
 if(COMPILE_CUDA)
     find_package(CUDA REQUIRED)
     enable_language(CUDA)
-    add_executable(${GPU_BIN} ${GPU_SRC} ${RUNTIME_COMMON_FILES} ${RUNTIME_GPU_FILES})
 
     if(USE_WALBERLA)
         waLBerla_add_executable(
             NAME ${GPU_BIN}
-            FILES ${GPU_SRC} ${RUNTIME_COMMON_FILES}
-            DEPENDS blockforest core pe ${GPU_SRC})
+            FILES ${RUNTIME_COMMON_FILES}
+            DEPENDS blockforest core pe)
+    else()
+        add_executable(${GPU_BIN} ${GPU_SRC} ${RUNTIME_COMMON_FILES} ${RUNTIME_GPU_FILES})
     endif()
 
     if(ENABLE_GPU_DIRECT)
@@ -97,6 +101,7 @@ if(COMPILE_CUDA)
     target_compile_features(runtime_gpu PUBLIC cxx_std_11)
     set_target_properties(runtime_gpu PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
 
+    target_link_libraries(${GPU_BIN} runtime_gpu)
     target_compile_options(${GPU_BIN} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-arch=${CUDA_ARCH}>)
     target_include_directories(${GPU_BIN} PRIVATE ${CUDA_INCLUDE_DIRS})
     set_target_properties(${GPU_BIN} PROPERTIES CUDA_ARCHITECTURES ${CUDA_ARCH})
@@ -105,9 +110,6 @@ if(COMPILE_CUDA)
     target_include_directories(runtime_gpu PRIVATE ${CUDA_INCLUDE_DIRS})
     set_target_properties(runtime_gpu PROPERTIES CUDA_ARCHITECTURES ${CUDA_ARCH})
 
-    #set_property(TARGET ${GPU_BIN} PROPERTY CUDA_SEPARABLE_COMPILATION ON)
-    #target_link_libraries(${GPU_BIN} runtime_gpu)
-
     add_custom_command(
         OUTPUT ${GPU_SRC}
         COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/examples/${TESTCASE}.py gpu
@@ -122,15 +124,19 @@ target_link_libraries(${CPU_BIN} ${CMAKE_EXE_LINKER_FLAGS})
 set_target_properties(${CPU_BIN} PROPERTIES CXX_STANDARD_REQUIRED ON)
 set_target_properties(${CPU_BIN} PROPERTIES CXX_STANDARD 17)
 
-target_link_libraries(${GPU_BIN} ${CMAKE_EXE_LINKER_FLAGS})
-set_target_properties(${GPU_BIN} PROPERTIES CXX_STANDARD_REQUIRED ON)
-set_target_properties(${GPU_BIN} PROPERTIES CXX_STANDARD 17)
+if(COMPILE_CUDA)
+    target_link_libraries(${GPU_BIN} ${CMAKE_EXE_LINKER_FLAGS})
+    set_target_properties(${GPU_BIN} PROPERTIES CXX_STANDARD_REQUIRED ON)
+    set_target_properties(${GPU_BIN} PROPERTIES CXX_STANDARD 17)
+endif()
 
 if(USE_MPI)
     find_package(MPI REQUIRED)
     include_directories(SYSTEM ${MPI_INCLUDE_PATH})
     target_link_libraries(${CPU_BIN} ${MPI_LIBRARIES})
-    target_link_libraries(${GPU_BIN} ${MPI_LIBRARIES})
+    if(COMPILE_CUDA)
+        target_link_libraries(${GPU_BIN} ${MPI_LIBRARIES})
+    endif()
 endif()
 
 if(LIKWID_DIR)
diff --git a/examples/md.py b/examples/md.py
index 30a669a839e26869071d56b74c282f9124619667..828321c918a2f3119b28ae1a54f187fb5dba0ac0 100644
--- a/examples/md.py
+++ b/examples/md.py
@@ -35,7 +35,7 @@ nz = 32
 rho = 0.8442
 temp = 1.44
 
-psim = pairs.simulation("md", [pairs.point_mass()], timesteps=200, double_prec=True)
+psim = pairs.simulation("md", [pairs.point_mass()], timesteps=200, double_prec=True, debug=True)
 
 if target == 'gpu':
     psim.target(pairs.target_gpu())
diff --git a/runtime/boundary_weights.hpp b/runtime/boundary_weights.hpp
index 3678c899f46c1ae75820545016beb2e72ca611f6..7c7a7d86f8508b0ab10a2a66bd7929b03e1efb0a 100644
--- a/runtime/boundary_weights.hpp
+++ b/runtime/boundary_weights.hpp
@@ -66,6 +66,7 @@ void compute_boundary_weights(
                 (*comm_weight)++;
         }
     }
+    std::cout << "comp_weight = " << (*comp_weight) << ", comm_weight = " << (*comm_weight) << std::endl;
     #else
     real_t *position_ptr = static_cast<real_t *>(position_prop.getDevicePointer());
 
diff --git a/runtime/domain/block_forest.cpp b/runtime/domain/block_forest.cpp
index 8b8d5b5d5862d7fbe23db0a77a7d46edde1ce461..e2d73070808cf27bd1c17b52be91d8c4d5fc3b5a 100644
--- a/runtime/domain/block_forest.cpp
+++ b/runtime/domain/block_forest.cpp
@@ -79,14 +79,14 @@ void BlockForest::updateNeighborhood() {
 }
 
 void BlockForest::copyRuntimeArray(const std::string& name, void *dest, const int size) {
-    void *src = name.compare("ranks")           ? static_cast<void *>(ranks.data()) :
-                name.compare("naabbs")          ? static_cast<void *>(naabbs.data()) :
-                name.compare("aabb_offsets")    ? static_cast<void *>(aabb_offsets.data()) :
-                name.compare("aabbs")           ? static_cast<void *>(aabbs.data()) :
-                name.compare("subdom")          ? static_cast<void *>(subdom) : nullptr;
+    void *src = name.compare("ranks") == 0          ? static_cast<void *>(ranks.data()) :
+                name.compare("naabbs") == 0         ? static_cast<void *>(naabbs.data()) :
+                name.compare("aabb_offsets") == 0   ? static_cast<void *>(aabb_offsets.data()) :
+                name.compare("aabbs") == 0          ? static_cast<void *>(aabbs.data()) :
+                name.compare("subdom") == 0         ? static_cast<void *>(subdom) : nullptr;
 
     PAIRS_ASSERT(src != nullptr);
-    bool is_real = name.compare("aabbs") || name.compare("subdom");
+    bool is_real = (name.compare("aabbs") == 0) || (name.compare("subdom") == 0);
     int tsize = is_real ? sizeof(real_t) : sizeof(int);
     std::memcpy(dest, src, size * tsize);
 }
@@ -247,6 +247,11 @@ void BlockForest::initialize(int *argc, char ***argv) {
 
     if(balance_workload) {
         this->initializeWorkloadBalancer();
+    }
+}
+
+void BlockForest::update() {
+    if(balance_workload) {
         this->updateWeights();
         forest->refresh();
     }
diff --git a/runtime/domain/block_forest.hpp b/runtime/domain/block_forest.hpp
index 80d63d59f40d9e75389c9d650fe152e19db9cb79..673369d6bb925317de43717c5bc73cd5e8c3fa9c 100644
--- a/runtime/domain/block_forest.hpp
+++ b/runtime/domain/block_forest.hpp
@@ -52,6 +52,7 @@ public:
     }
 
     void initialize(int *argc, char ***argv);
+    void update();
     void finalize();
     int getWorldSize() const { return world_size; }
     int getRank() const { return rank; }
diff --git a/runtime/domain/domain_partitioning.hpp b/runtime/domain/domain_partitioning.hpp
index 5806f3a00d281dc4dac283a9e84753de854ae684..77af9b8569e8dafbc0015974ab3fb6ec0383d635 100644
--- a/runtime/domain/domain_partitioning.hpp
+++ b/runtime/domain/domain_partitioning.hpp
@@ -38,6 +38,7 @@ public:
     }
 
     virtual void initialize(int *argc, char ***argv) = 0;
+    virtual void update() = 0;
     virtual int getWorldSize() const = 0;
     virtual int getRank() const = 0;
     virtual int getNumberOfNeighborAABBs() = 0;
diff --git a/runtime/domain/regular_6d_stencil.cpp b/runtime/domain/regular_6d_stencil.cpp
index aa276793f75ab163e69e1dae067fdbc7c7aab151..ae66aca0db373bb5814182ab5bbf7018713e2c30 100644
--- a/runtime/domain/regular_6d_stencil.cpp
+++ b/runtime/domain/regular_6d_stencil.cpp
@@ -89,6 +89,8 @@ void Regular6DStencil::initialize(int *argc, char ***argv) {
     this->setBoundingBox();
 }
 
+void Regular6DStencil::update() {}
+
 void Regular6DStencil::finalize() {
     MPI_Finalize();
 }
@@ -101,19 +103,19 @@ int Regular6DStencil::isWithinSubdomain(real_t x, real_t y, real_t z) {
 
 void Regular6DStencil::copyRuntimeArray(const std::string& name, void *dest, const int size) {
     for(int d = 0; d < ndims; d++) {
-        if(name.compare("neighbor_ranks")) {
+        if(name.compare("neighbor_ranks") == 0) {
             int *neighbor_ranks = static_cast<int *>(dest);
             neighbor_ranks[d * 2 + 0] = prev[d];
             neighbor_ranks[d * 2 + 1] = next[d];
         }
 
-        if(name.compare("pbc")) {
+        if(name.compare("pbc") == 0) {
             int *pbc = static_cast<int *>(dest);
             pbc[d * 2 + 0] = pbc_prev[d];
             pbc[d * 2 + 1] = pbc_next[d];
         }
 
-        if(name.compare("subdom")) {
+        if(name.compare("subdom") == 0) {
             real_t *subdom = static_cast<real_t *>(dest);
             subdom[d * 2 + 0] = subdom_min[d];
             subdom[d * 2 + 1] = subdom_max[d];
diff --git a/runtime/domain/regular_6d_stencil.hpp b/runtime/domain/regular_6d_stencil.hpp
index b507cd0a8550c53eeb1a2f833672532bbd94df8b..5b91d7a53c73f75edb4671deb9dba9069b68a9c6 100644
--- a/runtime/domain/regular_6d_stencil.hpp
+++ b/runtime/domain/regular_6d_stencil.hpp
@@ -51,6 +51,7 @@ public:
     void setConfig();
     void setBoundingBox();
     void initialize(int *argc, char ***argv);
+    void update();
     void finalize();
 
     int getWorldSize() const { return world_size; }
diff --git a/runtime/pairs.hpp b/runtime/pairs.hpp
index 9af0f9737707b239e11f21363ac656511fab2c2f..04f8f69ccd390499c64ebbd8c6efe1bfc1b57190 100644
--- a/runtime/pairs.hpp
+++ b/runtime/pairs.hpp
@@ -303,6 +303,8 @@ public:
         int *argc, char ***argv,
         real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax);
 
+    void updateDomain() { dom_part->update(); }
+
     DomainPartitioner *getDomainPartitioner() { return dom_part; }
     void communicateSizes(int dim, const int *send_sizes, int *recv_sizes);
 
diff --git a/src/pairs/analysis/__init__.py b/src/pairs/analysis/__init__.py
index 846843c3c72ee73a6359a74f397195eed453f8f2..99e7a80d768f74fa24f007358ad2976e9a35c006 100644
--- a/src/pairs/analysis/__init__.py
+++ b/src/pairs/analysis/__init__.py
@@ -6,6 +6,8 @@ from pairs.analysis.modules import FetchModulesReferences
 
 
 class Analysis:
+    """Compiler analysis performed on P4IRS"""
+
     def __init__(self, ast):
         self._ast = ast
 
diff --git a/src/pairs/sim/domain.py b/src/pairs/sim/domain.py
index 8126b58fea395785fcf5e486ca9573afa0d8049d..2e0d9877d28ca5f2dfd763ef21b20307ab5d752e 100644
--- a/src/pairs/sim/domain.py
+++ b/src/pairs/sim/domain.py
@@ -9,3 +9,12 @@ class InitializeDomain(Lowerable):
     @pairs_inline
     def lower(self):
         self.sim.domain_partitioning().initialize()
+
+
+class UpdateDomain(Lowerable):
+    def __init__(self, sim):
+        super().__init__(sim)
+
+    @pairs_inline
+    def lower(self):
+        self.sim.domain_partitioning().update()
diff --git a/src/pairs/sim/domain_partitioning.py b/src/pairs/sim/domain_partitioning.py
index 93822feacca8a32fb2e4a0a093ce2cda2cb87bde..83875af3847a2a61d787f910a270435835f6051a 100644
--- a/src/pairs/sim/domain_partitioning.py
+++ b/src/pairs/sim/domain_partitioning.py
@@ -36,6 +36,9 @@ class DimensionRanges:
         Call_Void(self.sim, "pairs->copyRuntimeArray", ['pbc', self.pbc, self.sim.ndims() * 2])
         Call_Void(self.sim, "pairs->copyRuntimeArray", ['subdom', self.subdom, self.sim.ndims() * 2])
 
+    def update(self):
+        Call_Void(self.sim, "pairs->updateDomain", [])
+
     def ghost_particles(self, step, position, offset=0.0):
         # Particles with one of the following flags are ignored
         flags_to_exclude = (Flags.Infinite | Flags.Global)
@@ -99,6 +102,8 @@ class BlockForest:
         grid_array = [(self.sim.grid.min(d), self.sim.grid.max(d)) for d in range(self.sim.ndims())]
         Call_Void(self.sim, "pairs->initDomain", [param for delim in grid_array for param in delim])
 
+    def update(self):
+        Call_Void(self.sim, "pairs->updateDomain", [])
         Assign(self.sim, self.nranks, Call_Int(self.sim, "pairs->getNumberOfNeighborRanks", []))
         Assign(self.sim, self.ntotal_aabbs, Call_Int(self.sim, "pairs->getNumberOfNeighborAABBs", []))
 
diff --git a/src/pairs/sim/simulation.py b/src/pairs/sim/simulation.py
index d8d751818829e0aa343967218eeabd2df969c51f..faacef8eb79f2122f975db60a1b58378194578b5 100644
--- a/src/pairs/sim/simulation.py
+++ b/src/pairs/sim/simulation.py
@@ -17,7 +17,7 @@ from pairs.sim.comm import Comm
 from pairs.sim.contact_history import ContactHistory, BuildContactHistory, ClearUnusedContactHistory, ResetContactHistoryUsageStatus
 from pairs.sim.copper_fcc_lattice import CopperFCCLattice
 from pairs.sim.dem_sc_grid import DEMSCGrid
-from pairs.sim.domain import InitializeDomain
+from pairs.sim.domain import InitializeDomain, UpdateDomain
 from pairs.sim.domain_partitioners import DomainPartitioners
 from pairs.sim.domain_partitioning import BlockForest, DimensionRanges
 from pairs.sim.features import AllocateFeatureProperties
@@ -243,7 +243,7 @@ class Simulation:
 
     def add_var(self, var_name, var_type, init_value=0, runtime=False):
         assert self.var(var_name) is None, f"Variable already defined: {var_name}"
-        return self.vars.add(var_name, var_type, init_value)
+        return self.vars.add(var_name, var_type, init_value, runtime)
 
     def add_temp_var(self, init_value):
         return self.vars.add_temp(init_value)
@@ -436,6 +436,8 @@ class Simulation:
         comm = Comm(self, self._dom_part)
         # Params that determine when a method must be called only when reneighboring
         every_reneighbor_params = {'every': self.reneighbor_frequency}
+        # Update domain is added at last on setups because particles must be already present in the simulation
+        self.setups.add_statement(UpdateDomain(self))
 
         # First steps executed during each time-step in the simulation
         timestep_procedures = self.pre_step_functions + [