diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9b710790537b8049d0567346796c6a42e768cb85..17a08e09013f43a0b304f0c973bfc4651a380ebb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -71,6 +71,12 @@ endif()
 file(COPY runtime DESTINATION ${CMAKE_BINARY_DIR})
 file(COPY data DESTINATION ${CMAKE_BINARY_DIR})
 
+set(OUTPUT_DIR ${CMAKE_BINARY_DIR}/output)
+if(EXISTS ${OUTPUT_DIR})
+    file(REMOVE_RECURSE ${OUTPUT_DIR})
+endif()
+file(MAKE_DIRECTORY ${OUTPUT_DIR})
+
 execute_process(
     COMMAND ${PYTHON_EXECUTABLE} setup.py build
     WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
diff --git a/examples/main.cpp b/examples/main.cpp
index 991cbecbe5aa7eb088f878a756f13fa97341b378..f3841e3a1534a68ac66827f8357bc343ab229e60 100644
--- a/examples/main.cpp
+++ b/examples/main.cpp
@@ -1,6 +1,6 @@
 #include <iostream>
 //---
-#include "md.hpp"
+#include "dem_sd.hpp"
 
 int main(int argc, char **argv) {
     PairsSimulation *ps = new PairsSimulation();
diff --git a/runtime/boundary_weights.cpp b/runtime/boundary_weights.cpp
index 527789e5316d2f26f5285d8c8da1fc9beb4fa987..1a714d17794f9e5d89612b4034ee605106e466a3 100644
--- a/runtime/boundary_weights.cpp
+++ b/runtime/boundary_weights.cpp
@@ -57,7 +57,7 @@ void compute_boundary_weights(
                 (*comm_weight)++;
         }
     }
-    std::cout << "comp_weight = " << (*comp_weight) << ", comm_weight = " << (*comm_weight) << std::endl;
+    // std::cout << "comp_weight = " << (*comp_weight) << ", comm_weight = " << (*comm_weight) << std::endl;
     #else
     real_t *position_ptr = static_cast<real_t *>(position_prop.getDevicePointer());
 
diff --git a/runtime/domain/block_forest.cpp b/runtime/domain/block_forest.cpp
index 1ce021efd1f3814915e990b77ac0ef1f8e4f7486..553d76b48c274d4b968b2a256924086aea99c107 100644
--- a/runtime/domain/block_forest.cpp
+++ b/runtime/domain/block_forest.cpp
@@ -24,7 +24,7 @@ namespace pairs {
 void BlockForest::updateNeighborhood() {
     std::map<int, std::vector<walberla::math::AABB>> neighborhood;
     std::map<int, std::vector<walberla::BlockID>> blocks_pushed;
-    auto me = walberla::mpi::MPIManager::instance()->rank();
+    auto me = mpiManager->rank();
     this->nranks = 0;
     this->total_aabbs = 0;
 
@@ -41,7 +41,7 @@ void BlockForest::updateNeighborhood() {
             for(uint neigh = 0; neigh < block->getNeighborhoodSize(); ++neigh) {
                 auto neighbor_rank = walberla::int_c(block->getNeighborProcess(neigh));
 
-                if(neighbor_rank != me) {
+                // if(neighbor_rank != me) {
                     const walberla::BlockID& neighbor_block = block->getNeighborId(neigh);
                     walberla::math::AABB neighbor_aabb = block->getNeighborAABB(neigh);
                     auto neighbor_info = (*info)[neighbor_block];
@@ -55,7 +55,7 @@ void BlockForest::updateNeighborhood() {
                         neighborhood[neighbor_rank].push_back(neighbor_aabb);
                         blocks_pushed[neighbor_rank].push_back(neighbor_block);
                     }
-                }
+                // }
             }
         }
     }
@@ -95,7 +95,7 @@ void BlockForest::copyRuntimeArray(const std::string& name, void *dest, const in
 }
 
 void BlockForest::updateWeights() {
-    walberla::mpi::BufferSystem bs(walberla::mpi::MPIManager::instance()->comm(), 756);
+    walberla::mpi::BufferSystem bs(mpiManager->comm(), 756);
 
     info->clear();
     for(auto& iblock: *forest) {
@@ -226,13 +226,11 @@ void BlockForest::rebalance() {
 }
 
 void BlockForest::initialize(int *argc, char ***argv) {
-    MPI_Init(argc, argv);
-    MPI_Comm_size(MPI_COMM_WORLD, &world_size);
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-
-    auto mpiManager = walberla::mpi::MPIManager::instance();
+    mpiManager = walberla::mpi::MPIManager::instance();
     mpiManager->initializeMPI(argc, argv);
     mpiManager->useWorldComm();
+    world_size = mpiManager->numProcesses();
+    rank = mpiManager->rank();
 
     walberla::math::AABB domain(
         grid_min[0], grid_min[1], grid_min[2], grid_max[0], grid_max[1], grid_max[2]);
@@ -244,7 +242,7 @@ void BlockForest::initialize(int *argc, char ***argv) {
     auto ref_level = balance_workload ? getInitialRefinementLevel(procs) : 0;
 
     forest = walberla::blockforest::createBlockForest(
-        domain, block_config, walberla::Vector3<bool>(true, true, true), procs, ref_level);
+        domain, block_config, walberla::Vector3<bool>(globalPBC[0], globalPBC[1], globalPBC[2]), procs, ref_level);
 
     this->info = make_shared<walberla::blockforest::InfoCollection>();
 
@@ -345,7 +343,7 @@ void BlockForest::initializeWorkloadBalancer() {
 }
 
 void BlockForest::finalize() {
-    MPI_Finalize();
+    mpiManager->finalizeMPI();
 }
 
 int BlockForest::isWithinSubdomain(real_t x, real_t y, real_t z) {
@@ -361,19 +359,28 @@ int BlockForest::isWithinSubdomain(real_t x, real_t y, real_t z) {
 }
 
 void BlockForest::communicateSizes(int dim, const int *nsend, int *nrecv) {
-    std::vector<MPI_Request> send_requests(ranks.size(), MPI_REQUEST_NULL);
-    std::vector<MPI_Request> recv_requests(ranks.size(), MPI_REQUEST_NULL);
+    std::vector<MPI_Request> send_requests;
+    std::vector<MPI_Request> recv_requests;
     size_t nranks = 0;
 
     for(auto neigh_rank: ranks) {
-        MPI_Irecv(&nrecv[nranks], 1, MPI_INT, neigh_rank, 0, MPI_COMM_WORLD, &recv_requests[nranks]);
-        MPI_Isend(&nsend[nranks], 1, MPI_INT, neigh_rank, 0, MPI_COMM_WORLD, &send_requests[nranks]);
+        if(neigh_rank != rank) {
+            MPI_Request send_req, recv_req;
+            MPI_Irecv(&nrecv[nranks], 1, MPI_INT, neigh_rank, 0, MPI_COMM_WORLD, &recv_req);
+            MPI_Isend(&nsend[nranks], 1, MPI_INT, neigh_rank, 0, MPI_COMM_WORLD, &send_req);
+            send_requests.push_back(send_req);
+            recv_requests.push_back(recv_req);
+        } else {
+            nrecv[nranks] = nsend[nranks];
+        }
         nranks++;
     }
 
-    if(nranks > 0) {
-        MPI_Waitall(nranks, send_requests.data(), MPI_STATUSES_IGNORE);
-        MPI_Waitall(nranks, recv_requests.data(), MPI_STATUSES_IGNORE);
+    if(!send_requests.empty()) {
+        MPI_Waitall(send_requests.size(), send_requests.data(), MPI_STATUSES_IGNORE);
+    }
+    if(!recv_requests.empty()) {
+        MPI_Waitall(recv_requests.size(), recv_requests.data(), MPI_STATUSES_IGNORE);
     }
 }
 
diff --git a/runtime/domain/block_forest.hpp b/runtime/domain/block_forest.hpp
index 15aac5b59519f87d8e0ae85a6633cf88ecdfd910..a92eb9b02b77252798436530a81c8361387050f4 100644
--- a/runtime/domain/block_forest.hpp
+++ b/runtime/domain/block_forest.hpp
@@ -22,6 +22,7 @@ class PairsRuntime;
 
 class BlockForest : public DomainPartitioner {
 private:
+    std::shared_ptr<walberla::mpi::MPIManager> mpiManager;
     std::shared_ptr<walberla::BlockForest> forest;
     std::shared_ptr<walberla::blockforest::InfoCollection> info;
     std::vector<int> ranks;
@@ -30,14 +31,15 @@ private:
     std::vector<double> aabbs;
     PairsRuntime *ps;
     real_t *subdom;
+    const bool globalPBC[3];
     int world_size, rank, nranks, total_aabbs;
     bool balance_workload;
 
 public:
     BlockForest(
         PairsRuntime *ps_,
-        real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax) :
-        DomainPartitioner(xmin, xmax, ymin, ymax, zmin, zmax), ps(ps_) {
+        real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax, bool pbcx, bool pbcy, bool pbcz) :
+        DomainPartitioner(xmin, xmax, ymin, ymax, zmin, zmax), ps(ps_), globalPBC{pbcx, pbcy, pbcz} {
 
         subdom = new real_t[ndims * 2];
     }
diff --git a/runtime/pairs.cpp b/runtime/pairs.cpp
index 958f781e983fa4e26d32356398935b608e99264b..8127602f8cdfd5af129fe35ce1bc99bae2ee52b6 100644
--- a/runtime/pairs.cpp
+++ b/runtime/pairs.cpp
@@ -14,7 +14,7 @@ namespace pairs {
 
 void PairsRuntime::initDomain(
     int *argc, char ***argv,
-    real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax) {
+    real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax, bool pbcx, bool pbcy, bool pbcz) {
 
     if(dom_part_type == RegularPartitioning) {
         const int flags[] = {1, 1, 1};
@@ -23,7 +23,7 @@ void PairsRuntime::initDomain(
         const int flags[] = {1, 1, 0};
         dom_part = new Regular6DStencil(xmin, xmax, ymin, ymax, zmin, zmax, flags);
     } else if(dom_part_type == BlockForestPartitioning) {
-        dom_part = new BlockForest(this, xmin, xmax, ymin, ymax, zmin, zmax);
+        dom_part = new BlockForest(this, xmin, xmax, ymin, ymax, zmin, zmax, pbcx, pbcy, pbcz);
     } else {
         PAIRS_EXCEPTION("Domain partitioning type not implemented!\n");
     }
@@ -392,11 +392,20 @@ void PairsRuntime::communicateData(
     #else
     int nsend_all = 0;
     int nrecv_all = 0;
-    for(int d = 0; d <= dim; d++) {
-        nsend_all += nsend[d * 2 + 0];
-        nsend_all += nsend[d * 2 + 1];
-        nrecv_all += nrecv[d * 2 + 0];
-        nrecv_all += nrecv[d * 2 + 1];
+    if(this->dom_part_type == RegularPartitioning || this->dom_part_type == RegularXYPartitioning){
+        for(int d = 0; d <= dim; d++) {
+            nsend_all += nsend[d * 2 + 0];
+            nsend_all += nsend[d * 2 + 1];
+            nrecv_all += nrecv[d * 2 + 0];
+            nrecv_all += nrecv[d * 2 + 1];
+        }
+    }
+    else if (this->dom_part_type == BlockForestPartitioning){
+        int nranks = this->getDomainPartitioner()->getNumberOfNeighborRanks();
+        for (int n=0; n<nranks; ++n){
+            nsend_all += nsend[n];
+            nrecv_all += nrecv[n];
+        }
     }
 
     copyArrayToHost(send_buf_id, Ignore, nsend_all * elem_size * sizeof(real_t));
@@ -446,11 +455,20 @@ void PairsRuntime::communicateAllData(
     #else
     int nsend_all = 0;
     int nrecv_all = 0;
-    for(int d = 0; d <= ndims; d++) {
-        nsend_all += nsend[d * 2 + 0];
-        nsend_all += nsend[d * 2 + 1];
-        nrecv_all += nrecv[d * 2 + 0];
-        nrecv_all += nrecv[d * 2 + 1];
+    if(this->dom_part_type == RegularPartitioning || this->dom_part_type == RegularXYPartitioning){
+        for(int d = 0; d <= ndims; d++) {
+            nsend_all += nsend[d * 2 + 0];
+            nsend_all += nsend[d * 2 + 1];
+            nrecv_all += nrecv[d * 2 + 0];
+            nrecv_all += nrecv[d * 2 + 1];
+        }
+    }
+    else if (this->dom_part_type == BlockForestPartitioning){
+        int nranks = this->getDomainPartitioner()->getNumberOfNeighborRanks();
+        for (int n=0; n<nranks; ++n){
+            nsend_all += nsend[n];
+            nrecv_all += nrecv[n];
+        }
     }
 
     copyArrayToHost(send_buf_id, Ignore, nsend_all * elem_size * sizeof(real_t));
diff --git a/runtime/pairs.hpp b/runtime/pairs.hpp
index d7716131f2f3d9942607d90472a0534fe1e9a538..2bcf71f047f6b32842306c4505699c50aaa5c3da 100644
--- a/runtime/pairs.hpp
+++ b/runtime/pairs.hpp
@@ -301,7 +301,8 @@ public:
     // Communication
     void initDomain(
         int *argc, char ***argv,
-        real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax);
+        real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax,
+        bool pbcx = 0, bool pbcy = 0, bool pbcz = 0);
 
     void updateDomain() { dom_part->update(); }
 
diff --git a/src/pairs/code_gen/cgen.py b/src/pairs/code_gen/cgen.py
index df4816f0543933900633f6b9a7f92b295f1e3481..fec4fd8bde54a6f7930d1c26f7ca4020ccf82b8a 100644
--- a/src/pairs/code_gen/cgen.py
+++ b/src/pairs/code_gen/cgen.py
@@ -26,7 +26,7 @@ from pairs.ir.properties import Property, PropertyAccess, RegisterProperty, Real
 from pairs.ir.select import Select
 from pairs.ir.sizeof import Sizeof
 from pairs.ir.types import Types
-from pairs.ir.utils import Print
+from pairs.ir.print import Print, PrintCode
 from pairs.ir.variables import Var, DeclareVariable, Deref
 from pairs.ir.vectors import Vector, VectorAccess, VectorOp, ZeroVector
 from pairs.sim.domain_partitioners import DomainPartitioners
@@ -301,6 +301,8 @@ class CGen:
         self.print("    void end() {")
         self.print("        pairs::print_timers(pairs_runtime);")
         self.print("        pairs::print_stats(pairs_runtime, pobj->nlocal, pobj->nghost);")
+        self.print("        delete pobj;")
+        self.print("        delete pairs_runtime;")
         self.print("    }")
         self.print("};")
         self.print.end()
@@ -759,7 +761,24 @@ class CGen:
             self.print(f"{ast_node.module.name}(pobj);")
 
         if isinstance(ast_node, Print):
-            self.print(f"PAIRS_DEBUG(\"{ast_node.string}\\n\");")
+            args = ast_node.args
+            exprs = [self.generate_expression(arg) for arg in args]
+            toPrint = "PAIRS_DEBUG(\""
+            for arg in args:
+                if Types.is_real(arg.type()):
+                    format = "%f "
+                elif Types.is_integer(arg.type()):
+                    format = "%d "
+                else:
+                    format = "%s "
+                toPrint += format
+
+            toPrint = toPrint + "\\n\", " + ", ".join(map(str, exprs)) + ");"
+            self.print(toPrint)
+
+        if isinstance(ast_node, PrintCode):
+            toPrint = self.generate_expression(ast_node.arg)
+            self.print(toPrint[1:-1])
 
         if isinstance(ast_node, Realloc):
             tkw = Types.c_keyword(self.sim, ast_node.array.type())
@@ -948,6 +967,12 @@ class CGen:
             assert mem is False, "Literal is not lvalue!"
             if ast_node.type() == Types.String:
                 return f"\"{ast_node.value}\""
+            
+            if ast_node.type() == Types.Boolean:
+                if ast_node.value == True:
+                    return "true"
+                if ast_node.value == False:
+                    return "false"
 
             if not ast_node.is_scalar():
                 assert index is not None, "Index must be set for non-scalar literals."
diff --git a/src/pairs/ir/mutator.py b/src/pairs/ir/mutator.py
index bbe06f9ce371d818111cf597ab36e99f65fe5137..669dbc44792e36f5af75c6be2c35b2347bd48427 100644
--- a/src/pairs/ir/mutator.py
+++ b/src/pairs/ir/mutator.py
@@ -55,6 +55,14 @@ class Mutator:
 
         return ast_node
 
+    def mutate_Print(self, ast_node):
+        ast_node.args = [self.mutate(arg) for arg in ast_node.args]
+        return ast_node
+
+    def mutate_PrintCode(self, ast_node):
+        ast_node.arg = self.mutate(ast_node.arg)
+        return ast_node
+
     def mutate_ArrayAccess(self, ast_node):
         ast_node.array = self.mutate(ast_node.array)
         ast_node.partial_indexes = [self.mutate(i) for i in ast_node.partial_indexes]
diff --git a/src/pairs/ir/utils.py b/src/pairs/ir/utils.py
index 33495668bc00d74021af97b24689513f86357bd5..2a0a707eebd7546745f6dd4017449ead7beb51cb 100644
--- a/src/pairs/ir/utils.py
+++ b/src/pairs/ir/utils.py
@@ -12,11 +12,3 @@ def is_terminal(node):
     terminal_types = (Array, ContactProperty, FeatureProperty, Iter, Neighbor, Property, Symbol, Var)
     return any([isinstance(node, _type) for _type in terminal_types])
 
-
-class Print(ASTNode):
-    def __init__(self, sim, string):
-        super().__init__(sim)
-        self.string = string
-
-    def __str__(self):
-        return f"Print<{self.string}>"
diff --git a/src/pairs/mapping/funcs.py b/src/pairs/mapping/funcs.py
index f77410043a102cf72a5719bec54e482ccb336afc..0632bd2fee3b3233c117211b701bf4cf96a376ea 100644
--- a/src/pairs/mapping/funcs.py
+++ b/src/pairs/mapping/funcs.py
@@ -178,7 +178,7 @@ class BuildParticleIR(ast.NodeVisitor):
 
     def visit_If(self, node):
         condition = self.visit(node.test)
-        one_way = node.orelse is None
+        one_way = node.orelse is None or len(node.orelse) == 0
 
         if one_way:
             for _ in Filter(self.sim, condition):
diff --git a/src/pairs/mapping/keywords.py b/src/pairs/mapping/keywords.py
index ad10c960b8bf87b3f828417f7f8c3049727b23f5..51c255de3767f567182afa8dc5616e5f85151604 100644
--- a/src/pairs/mapping/keywords.py
+++ b/src/pairs/mapping/keywords.py
@@ -10,6 +10,7 @@ from pairs.ir.quaternions import Quaternion
 from pairs.ir.scalars import ScalarOp
 from pairs.ir.select import Select
 from pairs.ir.types import Types
+from pairs.ir.print import Print
 from pairs.ir.vectors import Vector, ZeroVector
 from pairs.sim.shapes import Shapes
 
@@ -30,6 +31,9 @@ class Keywords:
     def exists(self, keyword):
         method = self.get_method(f"keyword_{keyword}")
         return method is not None
+    
+    def keyword_printf(self, args):
+        Print(self.sim, *args)
 
     def keyword_is_point_mass(self, args):
         assert len(args) == 1, "is_point_mass() keyword requires one parameter."
diff --git a/src/pairs/sim/cell_lists.py b/src/pairs/sim/cell_lists.py
index 016ca1a96892e41502d97237a03edafec02d36be..e8b2d2ead9930eba21d7dea41eedf6bff00441ce 100644
--- a/src/pairs/sim/cell_lists.py
+++ b/src/pairs/sim/cell_lists.py
@@ -11,7 +11,7 @@ from pairs.ir.math import Ceil
 from pairs.ir.scalars import ScalarOp
 from pairs.ir.select import Select
 from pairs.ir.types import Types
-from pairs.ir.utils import Print
+from pairs.ir.print import Print
 from pairs.sim.flags import Flags
 from pairs.sim.lowerable import Lowerable
 
diff --git a/src/pairs/sim/comm.py b/src/pairs/sim/comm.py
index ed73805a7c8a36a2807890f676ed5dc443029ad7..dace3c9aeed7be47f1c120dcd05034628c6b1f64 100644
--- a/src/pairs/sim/comm.py
+++ b/src/pairs/sim/comm.py
@@ -9,7 +9,7 @@ from pairs.ir.contexts import Contexts
 from pairs.ir.device import CopyArray
 from pairs.ir.functions import Call_Void
 from pairs.ir.loops import For, ParticleFor, While
-from pairs.ir.utils import Print
+from pairs.ir.print import Print
 from pairs.ir.select import Select
 from pairs.ir.sizeof import Sizeof
 from pairs.ir.types import Types
@@ -273,18 +273,18 @@ class SetCommunicationOffsets(Lowerable):
         recv_offsets = self.comm.recv_offsets
         self.sim.module_name(f"set_communication_offsets{self.step}")
 
-        isend = 0
-        irecv = 0
+        isend = self.sim.add_temp_var(0)
+        irecv = self.sim.add_temp_var(0)
         for i in range(self.step):
             for j in self.comm.dom_part.step_indexes(i):
-                isend += nsend[j]
-                irecv += nrecv[j]
+                Assign(self.sim, isend, isend + nsend[j])
+                Assign(self.sim, irecv, irecv + nrecv[j])
 
         for j in self.comm.dom_part.step_indexes(self.step):
             Assign(self.sim, send_offsets[j], isend)
             Assign(self.sim, recv_offsets[j], irecv)
-            isend += nsend[j]
-            irecv += nrecv[j]
+            Assign(self.sim, isend, isend + nsend[j])
+            Assign(self.sim, irecv, irecv + nrecv[j])
 
 
 class PackGhostParticles(Lowerable):
diff --git a/src/pairs/sim/contact_history.py b/src/pairs/sim/contact_history.py
index 9a1a94d76e41ce8bd045ace9df45fcb45e0e8838..51b76f49d5bd9e2458f1af57d241a9b485a80bdc 100644
--- a/src/pairs/sim/contact_history.py
+++ b/src/pairs/sim/contact_history.py
@@ -4,7 +4,7 @@ from pairs.ir.branches import Branch, Filter
 from pairs.ir.loops import ParticleFor, For, While
 from pairs.ir.scalars import ScalarOp
 from pairs.ir.types import Types
-from pairs.ir.utils import Print
+from pairs.ir.print import Print
 from pairs.sim.interaction import NeighborFor
 from pairs.sim.lowerable import Lowerable
 
diff --git a/src/pairs/sim/domain_partitioning.py b/src/pairs/sim/domain_partitioning.py
index 973e63dec4d2dea87782e41e18453c099661169d..e781d235fe82a5dea66e1854ed3cbe33294f801f 100644
--- a/src/pairs/sim/domain_partitioning.py
+++ b/src/pairs/sim/domain_partitioning.py
@@ -6,6 +6,7 @@ from pairs.ir.scalars import ScalarOp
 from pairs.ir.select import Select
 from pairs.ir.types import Types
 from pairs.sim.flags import Flags
+from pairs.ir.lit import Lit
 
 
 class DimensionRanges:
@@ -86,10 +87,11 @@ class DimensionRanges:
 class BlockForest:
     def __init__(self, sim):
         self.sim                = sim
+        self.rank               = sim.add_var('rank', Types.Int32)
         self.nranks             = sim.add_var('nranks', Types.Int32)
-        self.nranks_capacity    = sim.add_var('nranks_capacity', Types.Int32)
+        self.nranks_capacity    = sim.add_var('nranks_capacity', Types.Int32, init_value=27)
         self.ntotal_aabbs       = sim.add_var('ntotal_aabbs', Types.Int32)
-        self.aabb_capacity      = sim.add_var('aabb_capacity', Types.Int32)
+        self.aabb_capacity      = sim.add_var('aabb_capacity', Types.Int32, init_value=27)
         self.ranks              = sim.add_array('ranks', [self.nranks_capacity], Types.Int32)
         self.naabbs             = sim.add_array('naabbs', [self.nranks_capacity], Types.Int32)
         self.aabb_offsets       = sim.add_array('aabb_offsets', [self.nranks_capacity], Types.Int32)
@@ -123,10 +125,11 @@ class BlockForest:
 
     def initialize(self):
         grid_array = [(self.sim.grid.min(d), self.sim.grid.max(d)) for d in range(self.sim.ndims())]
-        Call_Void(self.sim, "pairs_runtime->initDomain", [param for delim in grid_array for param in delim])
+        Call_Void(self.sim, "pairs_runtime->initDomain", [param for delim in grid_array for param in delim] + self.sim._pbc)
 
     def update(self):
         Call_Void(self.sim, "pairs_runtime->updateDomain", [])
+        Assign(self.sim, self.rank, Call_Int(self.sim, "pairs_runtime->getDomainPartitioner()->getRank", []))
         Assign(self.sim, self.nranks, Call_Int(self.sim, "pairs_runtime->getNumberOfNeighborRanks", []))
         Assign(self.sim, self.ntotal_aabbs, Call_Int(self.sim, "pairs_runtime->getNumberOfNeighborAABBs", []))
 
@@ -147,33 +150,65 @@ class BlockForest:
         Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['subdom', self.subdom, self.sim.ndims() * 2])
 
     def ghost_particles(self, step, position, offset=0.0):
+        ''' TODO :  If we have pbc, a sinlge particle can be a ghost particle multiple times (at different locations) for the same neighbor block,
+                    so this function should have the capability to yield more than one particle for every neighbor.
+                    But currently it doesn't have that capability, so we need at least 2 blocks in the dimensions that we have pbc.
+                    (eg: a particle in a 1x1x1 block config with pbc <ture, true, true> can be ghost at 7 other locations)
+        '''
         # Particles with one of the following flags are ignored
         flags_to_exclude = (Flags.Infinite | Flags.Global)
 
-        for r in For(self.sim, 0, self.nranks):
-            for i in For(self.sim, 0, self.sim.nlocal):
+        for r in For(self.sim, 0, self.nranks):     # for every neighbor rank
+            for i in For(self.sim, 0, self.sim.nlocal):     # for every local particle in this rank
                 particle_flags = self.sim.particle_flags
 
                 for _ in Filter(self.sim, ScalarOp.cmp(particle_flags[i] & flags_to_exclude, 0)):
-                    for aabb_id in For(self.sim, self.aabb_offsets[r], self.aabb_offsets[r] + self.naabbs[r]):
-                        full_cond = None
-                        pbc_shifts = []
-
-                        for d in range(self.sim.ndims()):
-                            aabb_min = self.aabbs[aabb_id][d * 2 + 0]
-                            aabb_max = self.aabbs[aabb_id][d * 2 + 1]
-                            center = aabb_min + (aabb_max - aabb_min) * 0.5
-                            dist = position[i][d] - center
-                            d_length = self.sim.grid.length(d)
-
-                            cond_pbc_neg = dist >  (d_length * 0.5)
-                            cond_pbc_pos = dist < -(d_length * 0.5)
-                            d_pbc = Select(self.sim, cond_pbc_neg, -1, Select(self.sim, cond_pbc_pos, 1, 0))
-
-                            adj_pos = position[i][d] + d_pbc * d_length
-                            d_cond = ScalarOp.and_op(adj_pos > aabb_min - offset, adj_pos < aabb_max + offset)
-                            full_cond = d_cond if full_cond is None else ScalarOp.and_op(full_cond, d_cond)
-                            pbc_shifts.append(d_pbc)
-
-                        for _ in Filter(self.sim, full_cond):
-                            yield i, r, self.ranks[r], pbc_shifts
+                    for aabb_id in For(self.sim, self.aabb_offsets[r], self.aabb_offsets[r] + self.naabbs[r]): # for every aabb of this neighbor
+                        for _ in Filter(self.sim, ScalarOp.neq(self.ranks[r] , self.rank)):     # if my neighobr is not my own rank
+                            full_cond = None
+                            pbc_shifts = []
+
+                            for d in range(self.sim.ndims()):
+                                aabb_min = self.aabbs[aabb_id][d * 2 + 0]
+                                aabb_max = self.aabbs[aabb_id][d * 2 + 1]
+                                d_pbc = 0
+                                d_length = self.sim.grid.length(d)
+
+                                if self.sim._pbc[d]:
+                                    center = aabb_min + (aabb_max - aabb_min) * 0.5     # center of neighbor block
+                                    dist = position[i][d] - center                      # distance of our particle from center of neighbor
+                                    cond_pbc_neg = dist >  (d_length * 0.5)
+                                    cond_pbc_pos = dist < -(d_length * 0.5)
+
+                                    d_pbc = Select(self.sim, cond_pbc_neg, -1, Select(self.sim, cond_pbc_pos, 1, 0))
+
+                                adj_pos = position[i][d] + d_pbc * d_length 
+                                d_cond = ScalarOp.and_op(adj_pos > aabb_min - offset, adj_pos < aabb_max + offset)
+                                full_cond = d_cond if full_cond is None else ScalarOp.and_op(full_cond, d_cond)
+                                pbc_shifts.append(d_pbc)
+
+                            for _ in Filter(self.sim, full_cond):
+                                yield i, r, self.ranks[r], pbc_shifts
+
+                        for _ in Filter(self.sim, ScalarOp.cmp(self.ranks[r] , self.rank)):     # if my neighbor is me (cuz I'm the only rank in a dimension that has pbc)
+                            pbc_shifts = []
+                            isghost = Lit(self.sim, 0)
+
+                            for d in range(self.sim.ndims()):
+                                aabb_min = self.aabbs[aabb_id][d * 2 + 0]
+                                aabb_max = self.aabbs[aabb_id][d * 2 + 1]
+                                center = aabb_min + (aabb_max - aabb_min) * 0.5     # center of neighbor block
+                                dist = position[i][d] - center                      # distance of our particle from center of neighbor
+                                d_pbc = 0
+                                d_length = self.sim.grid.length(d)
+
+                                if self.sim._pbc[d]:
+                                    cond_pbc_neg = dist >  (d_length*0.5 - offset)
+                                    cond_pbc_pos = dist < -(d_length*0.5 - offset)
+                                    d_pbc = Select(self.sim, cond_pbc_neg, -1, Select(self.sim, cond_pbc_pos, 1, 0))
+                                    isghost = ScalarOp.or_op(isghost, d_pbc)
+
+                                pbc_shifts.append(d_pbc)
+                            
+                            for _ in Filter(self.sim, isghost):
+                                yield i, r, self.ranks[r], pbc_shifts
diff --git a/src/pairs/sim/neighbor_lists.py b/src/pairs/sim/neighbor_lists.py
index 5662522b2c2d178319b6aea8f0ad12d92d0960f9..bc50e7e796355f3d7163a8b8b9080e83a58810f1 100644
--- a/src/pairs/sim/neighbor_lists.py
+++ b/src/pairs/sim/neighbor_lists.py
@@ -4,7 +4,7 @@ from pairs.ir.branches import Branch, Filter
 from pairs.ir.layouts import Layouts
 from pairs.ir.loops import ParticleFor
 from pairs.ir.types import Types
-from pairs.ir.utils import Print
+from pairs.ir.print import Print
 from pairs.sim.interaction import ParticleInteraction
 from pairs.sim.lowerable import Lowerable
 
diff --git a/src/pairs/sim/properties.py b/src/pairs/sim/properties.py
index 775fe19a69a39c0c8278a3aa4a77228b3073e897..85eb027763f203a9fdfea9b0d31db826944b46c4 100644
--- a/src/pairs/sim/properties.py
+++ b/src/pairs/sim/properties.py
@@ -4,7 +4,7 @@ from pairs.ir.loops import ParticleFor
 from pairs.ir.memory import Malloc, Realloc
 from pairs.ir.properties import RegisterProperty, RegisterContactProperty
 from pairs.ir.types import Types
-from pairs.ir.utils import Print
+from pairs.ir.print import Print
 from pairs.sim.lowerable import Lowerable, FinalLowerable
 from functools import reduce
 import operator
diff --git a/src/pairs/transformations/__init__.py b/src/pairs/transformations/__init__.py
index e788f35b0d257894551c09e149b9c9806cc71d49..469133cc334c6df99e0247e4f9728ab23592f705 100644
--- a/src/pairs/transformations/__init__.py
+++ b/src/pairs/transformations/__init__.py
@@ -2,7 +2,7 @@ import time
 from pairs.analysis import Analysis
 from pairs.transformations.blocks import LiftDeclarations, MergeAdjacentBlocks
 from pairs.transformations.devices import AddDeviceCopies, AddDeviceKernels, AddHostReferencesToModules, AddDeviceReferencesToModules
-from pairs.transformations.expressions import ReplaceSymbols, LowerNeighborIndexes, SimplifyExpressions, PruneUnusedVectorIndexes, AddExpressionDeclarations
+from pairs.transformations.expressions import ReplaceSymbols, LowerNeighborIndexes, ConstantPropagation, SimplifyExpressions, PruneUnusedVectorIndexes, AddExpressionDeclarations
 from pairs.transformations.instrumentation import AddModulesInstrumentation
 from pairs.transformations.loops import LICM
 from pairs.transformations.lower import Lower
@@ -47,6 +47,7 @@ class Transformations:
         self.apply(PruneUnusedVectorIndexes())
         self.apply(LowerNeighborIndexes())
         self.apply(ReplaceSymbols())
+        self.apply(ConstantPropagation())
         self.apply(SimplifyExpressions())
 
     def lift_declarations_to_owner_blocks(self):
diff --git a/src/pairs/transformations/expressions.py b/src/pairs/transformations/expressions.py
index bd85bf81c65608dd40bb5ced66a360fdad42720e..5565dcc9891fd39a2d2e714fa563b2b4092dbc07 100644
--- a/src/pairs/transformations/expressions.py
+++ b/src/pairs/transformations/expressions.py
@@ -5,6 +5,51 @@ from pairs.ir.operators import Operators
 from pairs.ir.types import Types
 
 
+class ConstantPropagation(Mutator):
+    def __init__(self, ast=None):
+        super().__init__(ast)
+
+    def mutate_ScalarOp(self, ast_node):
+        sim = ast_node.lhs.sim
+        ast_node.lhs = self.mutate(ast_node.lhs)
+        if not ast_node.operator().is_unary():
+            ast_node.rhs = self.mutate(ast_node.rhs)
+
+        if (not ast_node.operator().is_unary() and
+            isinstance(ast_node.lhs, Lit) and isinstance(ast_node.rhs, Lit)):
+
+            if ast_node.op == Operators.Add:
+                return Lit(sim, ast_node.lhs.value + ast_node.rhs.value)
+
+            if ast_node.op == Operators.Sub:
+                return Lit(sim, ast_node.lhs.value - ast_node.rhs.value)
+
+            if ast_node.op == Operators.Mul:
+                return Lit(sim, ast_node.lhs.value * ast_node.rhs.value)
+
+            if ast_node.op == Operators.Div:
+                return Lit(sim, ast_node.lhs.value / ast_node.rhs.value)
+            
+            if ast_node.op == Operators.Gt:
+                return Lit(sim, 1) if Lit(sim, ast_node.lhs.value > ast_node.rhs.value) else Lit(sim, 0)
+            
+            if ast_node.op == Operators.Lt:
+                return Lit(sim, 1) if Lit(sim, ast_node.lhs.value < ast_node.rhs.value) else Lit(sim, 0)
+            
+            if ast_node.op == Operators.Geq:
+                return Lit(sim, 1) if Lit(sim, ast_node.lhs.value >= ast_node.rhs.value) else Lit(sim, 0)
+            
+            if ast_node.op == Operators.Leq:
+                return Lit(sim, 1) if Lit(sim, ast_node.lhs.value <= ast_node.rhs.value) else Lit(sim, 0)
+            
+            if ast_node.op == Operators.Eq:
+                return Lit(sim, 1) if Lit(sim, ast_node.lhs.value == ast_node.rhs.value) else Lit(sim, 0)
+            
+            if ast_node.op == Operators.Neq:
+                return Lit(sim, 1) if Lit(sim, ast_node.lhs.value != ast_node.rhs.value) else Lit(sim, 0)
+            
+        return ast_node
+    
 class ReplaceSymbols(Mutator):
     def __init__(self, ast=None):
         super().__init__(ast)
diff --git a/src/pairs/transformations/modules.py b/src/pairs/transformations/modules.py
index 1ee2c9b15f09caa2e51dafc02cdf3405f7722e5c..8fed95e2656d9b6e13bdcf07255a5e7dfffde267 100644
--- a/src/pairs/transformations/modules.py
+++ b/src/pairs/transformations/modules.py
@@ -9,7 +9,7 @@ from pairs.ir.module import Module, ModuleCall
 from pairs.ir.mutator import Mutator
 from pairs.ir.properties import ReallocProperty
 from pairs.ir.types import Types
-from pairs.ir.utils import Print
+from pairs.ir.print import Print
 from pairs.ir.variables import Var, Deref
 from functools import reduce
 import operator