diff --git a/CMakeLists.txt b/CMakeLists.txt
index 32f6d0b872bec9f1824d81eda3021796ebc0d457..7d611bd77b955cd2a3d37dc2f9fdbe5a70e9a22f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,5 @@
 cmake_minimum_required(VERSION 3.8.2 FATAL_ERROR)
+# cmake_policy(SET CMP0074 NEW)
 project(pairs CXX)
 
 set(TESTCASE ${TESTCASE} CACHE STRING "Select the testcase from the following: md, dem")
@@ -30,6 +31,7 @@ if(DEBUG)
 endif()
 
 if(USE_WALBERLA)
+    # SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_WALBERLA_LOAD_BALANCING -DWALBERLA_BUILD_WITH_CUDA ")
     SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_WALBERLA_LOAD_BALANCING ")
     find_package(waLBerla REQUIRED)
     add_subdirectory(${walberla_SOURCE_DIR} ${walberla_BINARY_DIR} EXCLUDE_FROM_ALL)
@@ -47,6 +49,7 @@ set(RUNTIME_COMMON_FILES
     runtime/pairs.cpp
     runtime/boundary_weights.cpp
     runtime/copper_fcc_lattice.cpp
+    runtime/create_shape.cpp
     runtime/dem_sc_grid.cpp
     runtime/read_from_file.cpp
     runtime/stats.cpp
@@ -116,10 +119,12 @@ if(COMPILE_CUDA)
     enable_language(CUDA)
 
     if(USE_WALBERLA)
+        # set(WALBERLA_BUILD_WITH_CUDA ON CACHE BOOL "Enable CUDA in waLBerla")
         waLBerla_add_executable(
             NAME ${GPU_BIN}
             FILES ${EXEC_FILES}
             DEPENDS blockforest core pe)
+            # DEPENDS blockforest core pe gpu)
     else()
         add_executable(${GPU_BIN} ${GPU_SRC} ${EXEC_FILES} ${RUNTIME_GPU_FILES})
     endif()
diff --git a/examples/dem_sd.py b/examples/dem_sd.py
index 737da4b562249d089628bdf12926d833396f7415..740a6a0306308a65d14bf1b3dd42b11cbf82848b 100644
--- a/examples/dem_sd.py
+++ b/examples/dem_sd.py
@@ -4,7 +4,7 @@ import sys
 
 def update_mass_and_inertia(i):
     rotation_matrix[i] = diagonal_matrix(1.0)
-    rotation_quat[i] = default_quaternion()
+    rotation[i] = default_quaternion()
 
     if is_sphere(i):
         inv_inertia[i] = inversed(diagonal_matrix(0.4 * mass[i] * radius[i] * radius[i]))
@@ -48,8 +48,8 @@ def euler(i):
     linear_velocity[i] += inv_mass * force[i] * dt
     wdot = rotation_matrix[i] * (inv_inertia[i] * torque[i]) * transposed(rotation_matrix[i])
     phi = angular_velocity[i] * dt + 0.5 * wdot * dt * dt
-    rotation_quat[i] = quaternion(phi, length(phi)) * rotation_quat[i]
-    rotation_matrix[i] = quaternion_to_rotation_matrix(rotation_quat[i])
+    rotation[i] = quaternion(phi, length(phi)) * rotation[i]
+    rotation_matrix[i] = quaternion_to_rotation_matrix(rotation[i])
     angular_velocity[i] += wdot * dt
 
 
@@ -119,7 +119,7 @@ psim.add_property('radius', pairs.real(), 1.0)
 psim.add_property('normal', pairs.vector())
 psim.add_property('inv_inertia', pairs.matrix())
 psim.add_property('rotation_matrix', pairs.matrix())
-psim.add_property('rotation_quat', pairs.quaternion())
+psim.add_property('rotation', pairs.quaternion())
 psim.add_feature('type', ntypes)
 psim.add_feature_property('type', 'friction_static', pairs.real(), [frictionStatic for i in range(ntypes * ntypes)])
 psim.add_feature_property('type', 'friction_dynamic', pairs.real(), [frictionDynamic for i in range(ntypes * ntypes)])
@@ -128,9 +128,9 @@ psim.set_domain([0.0, 0.0, 0.0, domainSize_SI[0], domainSize_SI[1], domainSize_S
 # psim.set_domain_partitioner(pairs.block_forest(), initDomainFromWalberla=True)
 psim.set_domain_partitioner(pairs.block_forest())
 # psim.set_domain_partitioner(pairs.regular_domain_partitioner_xy())
-psim.pbc([True, True, False])
+psim.pbc([False, False, False])
 psim.dem_sc_grid(
-    domainSize_SI[0], domainSize_SI[1], domainSize_SI[2], generationSpacing_SI,
+    domainSize_SI[0], domainSize_SI[1], domainSize_SI[2]/2, generationSpacing_SI,
     diameter_SI, minDiameter_SI, maxDiameter_SI, initialVelocity_SI, densityParticle_SI, ntypes)
 
 #psim.read_particle_data(
@@ -146,10 +146,10 @@ psim.dem_sc_grid(
 #    ['type', 'mass', 'radius', 'position', 'linear_velocity', 'flags'],
 #    pairs.sphere())
 
-psim.read_particle_data(
-    "data/planes.input",
-    ['uid', 'type', 'mass', 'position', 'normal', 'flags'],
-    pairs.halfspace())
+# psim.read_particle_data(
+#     "data/planes.input",
+#     ['uid', 'type', 'mass', 'position', 'normal', 'flags'],
+#     pairs.halfspace())
 
 psim.setup(update_mass_and_inertia, {'densityParticle_SI': densityParticle_SI,
                                      'pi': math.pi,
diff --git a/examples/main.cpp b/examples/main.cpp
index 927cb50e2a43b43cbdc6aa0bb1bf9c34de5bf61a..c2647ded24e82859012723b7985f14df18359edc 100644
--- a/examples/main.cpp
+++ b/examples/main.cpp
@@ -3,7 +3,8 @@
 #include "dem_sd.hpp"
 
 int main(int argc, char **argv) {
-    PairsSimulation *ps = new PairsSimulation();
+    auto pairs_sim = std::make_shared<PairsSimulation>();
+    auto pairs_acc = std::make_shared<PairsAccessor>(pairs_sim);
 
     // Create forest (make sure to use_domain(forest)) ----------------------------------------------
     walberla::math::AABB domain(0, 0, 0, 0.1, 0.1, 0.1);
@@ -14,24 +15,44 @@ int main(int argc, char **argv) {
     auto block_config = walberla::Vector3<int>(2, 2, 1);
     auto ref_level = 0;
     std::shared_ptr<walberla::BlockForest> forest = walberla::blockforest::createBlockForest(
-            domain, block_config, walberla::Vector3<bool>(true, true, false), procs, ref_level);
+            domain, block_config, walberla::Vector3<bool>(false, false, false), procs, ref_level);
     //-----------------------------------------------------------------------------------------------
 
     // initialize pairs data structures ----------------------------------------------
-    ps->initialize();
+    pairs_sim->initialize();
 
     // either create new domain or use an existing one ----------------------------------------
-    // ps->create_domain(argc, argv);
-    ps->use_domain(forest);
+    // pairs_sim->create_domain(argc, argv);
+    pairs_sim->use_domain(forest);
+
+    // create planes and particles ------------------------------------------------------------
+    pairs_sim->create_halfspace(0,     0,      0,      1, 0, 0,        0, 13);
+    pairs_sim->create_halfspace(0,     0,      0,      0, 1, 0,        0, 13);
+    pairs_sim->create_halfspace(0,     0,      0,      0, 0, 1,        0, 13);
+    pairs_sim->create_halfspace(0.1,   0.1,    0.1,    -1, 0, 0,       0, 13);
+    pairs_sim->create_halfspace(0.1,   0.1,    0.1,    0, -1, 0,       0, 13);
+    pairs_sim->create_halfspace(0.1,   0.1,    0.1,    0, 0, -1,       0, 13);
+
+    pairs_sim->create_particle(0.03,   0.03,   0.08,   0.5, 0.5, 0 ,   1000, 0.0045, 1, 0);     
+    pairs_sim->create_particle(0.07,   0.07,   0.08,   -0.5, -0.5, 0 , 1000, 0.0045, 0, 0);
+
+    // TODO: make sure linkedCellWidth is larger than max diameter in the system
 
     // setup particles, setup functions, and the cell list stencil-------------------------------
-    ps->setup_sim();
+    pairs_sim->setup_sim();
+
+    for(int i=0; i<pairs_acc->size(); ++i){
+        if (pairs_acc->getType(i) == 1){
+            pairs_acc->setPosition(i, walberla::Vector3<double>(0.01, 0.01, 0.08));
+            pairs_acc->setLinearVelocity(i, walberla::Vector3<double>(0.5, 0.5, 0.5));
+        }
+    }
 
     for (int i=0; i<10000; ++i){
-        ps->do_timestep(i);
+        pairs_sim->do_timestep(i);
     }
 
-    ps->end();
+    pairs_sim->end();
 
     return 0;
 }
diff --git a/runtime/boundary_weights.cpp b/runtime/boundary_weights.cpp
index 065c4cb30d324878765ee2039bdaa69cb1db411c..f2c84f8a10869b87e99794a0eaca61b6283c05b7 100644
--- a/runtime/boundary_weights.cpp
+++ b/runtime/boundary_weights.cpp
@@ -126,7 +126,7 @@ void compute_boundary_weights(
     #else
     real_t *position_ptr = static_cast<real_t *>(position_prop.getDevicePointer());
 
-    ps->copyPropertyToDevice(position_prop, ReadOnly);
+    ps->copyPropertyToDevice(position_prop.getId(), ReadOnly);
 
     *comp_weight = cuda_compute_boundary_weights(
         position_ptr, 0, nlocal, particle_capacity, xmin, xmax, ymin, ymax, zmin, zmax);
diff --git a/runtime/copper_fcc_lattice.cpp b/runtime/copper_fcc_lattice.cpp
index 76c729c9e88830ffcf0274ea6fa434a8033b6d6b..901a37fa1dff6d2916f7129a3eab770005e0e100 100644
--- a/runtime/copper_fcc_lattice.cpp
+++ b/runtime/copper_fcc_lattice.cpp
@@ -64,7 +64,7 @@ double copper_fcc_lattice(
     double xlo = 0.0, xhi = xprd;
     double ylo = 0.0, yhi = yprd;
     double zlo = 0.0, zhi = zprd;
-    int natoms = 0;
+    int natoms = ps->getTrackedVariableAsInteger("nlocal");
     //int natoms_expected = 4 * nx * ny * nz;
 
     double alat = pow((4.0 / rho), (1.0 / 3.0));
diff --git a/runtime/create_shape.cpp b/runtime/create_shape.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d4480f5a9b59e86c953432a210a9471fe95f6ae8
--- /dev/null
+++ b/runtime/create_shape.cpp
@@ -0,0 +1,67 @@
+#include "create_shape.hpp"
+
+namespace pairs {
+
+void create_halfspace(PairsRuntime *pr, 
+                    double x, double y, double z, 
+                    double nx, double ny, double nz, 
+                    int type, int flag){
+    // TODO: ensure unique id in all functions that create particle or read particle from file
+    // TODO: increase capacity if exceeded
+    // auto uids = pr->getAsIntegerProperty(pr->getPropertyByName("uid"));   
+    auto shape = pr->getAsIntegerProperty(pr->getPropertyByName("shape"));
+    auto types = pr->getAsIntegerProperty(pr->getPropertyByName("type"));
+    auto flags = pr->getAsIntegerProperty(pr->getPropertyByName("flags"));
+    auto positions = pr->getAsVectorProperty(pr->getPropertyByName("position"));
+    auto normals = pr->getAsVectorProperty(pr->getPropertyByName("normal"));
+
+    if(pr->getDomainPartitioner()->isWithinSubdomain(x, y, z) || flag & (FLAGS_INFINITE | FLAGS_FIXED | FLAGS_GLOBAL) ){
+        int n = pr->getTrackedVariableAsInteger("nlocal");
+        // uids(n) = ;
+        positions(n, 0) = x;
+        positions(n, 1) = y;
+        positions(n, 2) = z;
+        normals(n, 0) = nx;
+        normals(n, 1) = ny;
+        normals(n, 2) = nz;
+        types(n) = type;
+        flags(n) = flag;
+        shape(n) = 1;   // halfspace
+        pr->setTrackedVariableAsInteger("nlocal", n + 1);
+    }
+}
+
+void create_particle(PairsRuntime *pr, 
+                    double x, double y, double z, 
+                    double vx, double vy, double vz, 
+                    double density, double radius, int type, int flag){
+    // TODO: ensure unique id in all functions that create particle or read particle from file
+    // TODO: increase capacity if exceeded
+    // auto uids = pr->getAsIntegerProperty(pr->getPropertyByName("uid"));   
+    auto shape = pr->getAsIntegerProperty(pr->getPropertyByName("shape"));
+    auto types = pr->getAsIntegerProperty(pr->getPropertyByName("type"));
+    auto flags = pr->getAsIntegerProperty(pr->getPropertyByName("flags"));
+    auto masses = pr->getAsFloatProperty(pr->getPropertyByName("mass"));
+    auto radii = pr->getAsFloatProperty(pr->getPropertyByName("radius"));
+    auto positions = pr->getAsVectorProperty(pr->getPropertyByName("position"));
+    auto velocities = pr->getAsVectorProperty(pr->getPropertyByName("linear_velocity"));
+
+    if(pr->getDomainPartitioner()->isWithinSubdomain(x, y, z)) {
+        int n = pr->getTrackedVariableAsInteger("nlocal");
+        // uids(n) = ;
+        radii(n) = radius;
+        masses(n) = ((4.0 / 3.0) * M_PI) * radius * radius * radius * density;
+        positions(n, 0) = x;
+        positions(n, 1) = y;
+        positions(n, 2) = z;
+        velocities(n, 0) = vx;
+        velocities(n, 1) = vy;
+        velocities(n, 2) = vz;
+        types(n) = type;
+        flags(n) = flag;
+        shape(n) = 0;   // sphere
+        pr->setTrackedVariableAsInteger("nlocal", n + 1);
+    }
+}
+
+}
\ No newline at end of file
diff --git a/runtime/create_shape.hpp b/runtime/create_shape.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..0a6f3c3173b8e197a78dbe5225a329748f9bc69e
--- /dev/null
+++ b/runtime/create_shape.hpp
@@ -0,0 +1,17 @@
+#include "pairs.hpp"
+
+#pragma once
+
+namespace pairs {
+
+void create_halfspace(PairsRuntime *pr, 
+                    double x, double y, double z, 
+                    double nx, double ny, double nz, 
+                    int type, int flag);
+
+void create_particle(PairsRuntime *pr, 
+                    double x, double y, double z, 
+                    double vx, double vy, double vz, 
+                    double density, double radius, int type, int flag);
+
+}
\ No newline at end of file
diff --git a/runtime/dem_sc_grid.cpp b/runtime/dem_sc_grid.cpp
index 66aa5ac59a0803b0a8a2294160ec034dce48cc5a..3ceb258296042f543637051677334522f39f5698 100644
--- a/runtime/dem_sc_grid.cpp
+++ b/runtime/dem_sc_grid.cpp
@@ -35,7 +35,7 @@ int dem_sc_grid(PairsRuntime *ps, double xmax, double ymax, double zmax, double
     auto positions = ps->getAsVectorProperty(ps->getPropertyByName("position"));
     auto velocities = ps->getAsVectorProperty(ps->getPropertyByName("linear_velocity"));
     int last_uid = 1;
-    int nparticles = 0;
+    int nparticles = ps->getTrackedVariableAsInteger("nlocal");
 
     const double xmin = 0.0;
     const double ymin = 0.0;
diff --git a/runtime/domain/block_forest.hpp b/runtime/domain/block_forest.hpp
index c2ee68fee77b606acd0373683161e6f66974c0f8..ee4188ac168db9a0ec8170677eed258b9592dcd6 100644
--- a/runtime/domain/block_forest.hpp
+++ b/runtime/domain/block_forest.hpp
@@ -44,7 +44,7 @@ public:
         subdom = new real_t[ndims * 2];
     }
 
-    BlockForest(PairsRuntime *ps_, std::shared_ptr<walberla::BlockForest> bf) :
+    BlockForest(PairsRuntime *ps_, const std::shared_ptr<walberla::BlockForest> &bf) :
         forest(bf),
         DomainPartitioner(bf->getDomain().xMin(), bf->getDomain().xMax(),
                         bf->getDomain().yMin(), bf->getDomain().yMax(),
diff --git a/runtime/pairs.hpp b/runtime/pairs.hpp
index 0c9f55e4110f19731aa6df9a70744950efc98746..3dbd5e12c1b0f2ec8f8c1bdef1ddc8e5de732310 100644
--- a/runtime/pairs.hpp
+++ b/runtime/pairs.hpp
@@ -306,7 +306,7 @@ public:
         bool pbcx = 0, bool pbcy = 0, bool pbcz = 0);
 
     template<typename Domain_T>
-    void useDomain(std::shared_ptr<Domain_T> domain_ptr);
+    void useDomain(const std::shared_ptr<Domain_T> &domain_ptr);
 
     void updateDomain() { dom_part->update(); }
 
@@ -345,7 +345,7 @@ public:
 };
 
 template<typename Domain_T>
-void PairsRuntime::useDomain(std::shared_ptr<Domain_T> domain_ptr){
+void PairsRuntime::useDomain(const std::shared_ptr<Domain_T> &domain_ptr){
     
     if(dom_part){ 
         PAIRS_ERROR("DomainPartitioner already exists!\n"); 
diff --git a/runtime/runtime_var.hpp b/runtime/runtime_var.hpp
index 7cf3aeaa9b6c32883299bfcb44044db457f4af48..c33bef33bbc2725af07b2b8426ed94a7adad7255 100644
--- a/runtime/runtime_var.hpp
+++ b/runtime/runtime_var.hpp
@@ -10,6 +10,7 @@ protected:
     T *h_ptr, *d_ptr;
 
 public:
+    RuntimeVar() = default;
     RuntimeVar(T *ptr) {
         h_ptr = ptr;
         d_ptr = (T *) pairs::device_alloc(sizeof(T));
diff --git a/runtime/vtk.cpp b/runtime/vtk.cpp
index cc47050082308d6171ba57df652dfdf40408af78..801b0e6ca5dcca7a30d02c24ede63f91193b4689 100644
--- a/runtime/vtk.cpp
+++ b/runtime/vtk.cpp
@@ -56,10 +56,8 @@ void vtk_write_data(
 
         out_file << "\n\n";
         out_file << "CELLS " << n << " " << (n * 2) << "\n";
-        for(int i = start; i < end; i++) {
-            if(!(flags(i) & FLAGS_INFINITE)) {
-                out_file << "1 " << (i - start) << "\n";
-            }
+        for(int i = 0; i < n; i++) {
+            out_file << "1 " << i << "\n";
         }
 
         out_file << "\n\n";
diff --git a/src/pairs/code_gen/cgen.py b/src/pairs/code_gen/cgen.py
index d4a9d757850a74e01a9d488d12b1284c3bfa7754..844ea83d6b610c0e5270413a561aa5470c541175 100644
--- a/src/pairs/code_gen/cgen.py
+++ b/src/pairs/code_gen/cgen.py
@@ -59,19 +59,29 @@ class CGen:
         if device and (not self.target.is_gpu() or not obj.device_flag):
             # Ideally this should never be called
             return "nullptr"
-
+        
         name = obj.name() if not device else f"d_{obj.name()}"
         t = obj.type()
-        is_temp_var = isinstance(obj, Var) and obj.temporary()
-
         if not Types.is_scalar(t) and index is not None:
             name += f"_{index}"
 
-        if self.generate_full_object_names and not is_temp_var:
-            return f"pobj->{name}"
+        if isinstance(obj, Var):
+            if self.generate_full_object_names:
+                if not obj.temporary():
+                    if obj.device_flag and self.target.is_gpu() and device:
+                        return f"pobj->rv_{obj.name()}"
+                    else:
+                        return f"pobj->{name}"
+            return name
 
+        if isinstance(obj, FeatureProperty) and device and obj.device_flag:
+            return name
+
+        if self.generate_full_object_names:
+            return f"pobj->{name}"
         else:
             return name
+        
 
     def generate_object_address(self, obj, device=False, index=None):
         if device and (not self.target.is_gpu() or not obj.device_flag):
@@ -137,6 +147,7 @@ class CGen:
         self.print("//---")
         self.print("#include \"runtime/likwid-marker.h\"")
         self.print("#include \"runtime/copper_fcc_lattice.hpp\"")
+        self.print("#include \"runtime/create_shape.hpp\"")
         self.print("#include \"runtime/dem_sc_grid.hpp\"")
         self.print("#include \"runtime/pairs.hpp\"")
         self.print("#include \"runtime/read_from_file.hpp\"")
@@ -174,6 +185,63 @@ class CGen:
 
         self.print("")
 
+    def generate_host_pairs_accessor_class(self):
+        self.print("#include <core/math/Vector3.h>")
+        self.print("#include <core/math/Quaternion.h>")
+        self.print("#include <core/math/Matrix3.h>")
+        self.print("")
+        self.print("class PairsAccessor {")
+        self.print("private:")
+        self.print("    std::shared_ptr<PairsSimulation> ps;")
+        self.print("public:")
+        self.print.add_indent(4)
+        self.print("PairsAccessor(const std::shared_ptr<PairsSimulation> &ps_): ps(ps_){}")
+        self.print("")
+        self.print("int size() const { return ps->pobj->nlocal; }")
+        self.print("")
+
+        for p in self.sim.properties:
+            ptr = p.name()
+
+            tkw = Types.walberla_keyword(self.sim, p.type())
+
+            splitname = ptr.split('_')
+            funcname = ''.join(word.capitalize() for word in splitname)
+            v = f"ps->pobj->{ptr}"
+            getSignature = f"{tkw} get{funcname}(const size_t i) const"
+            setSignature = f"void set{funcname}(const size_t i, {tkw} const &value)"
+
+            if Types.is_scalar(p.type()):
+                self.print(f"{getSignature} {{return {v}[i];}}")
+                self.print(f"{setSignature} {{{v}[i] = value;}}")
+                self.print("")
+
+            elif p.type()==Types.Vector:
+                self.print(f"{getSignature} {{return {tkw}({v}[i*3 + 0], {v}[i*3 + 1], {v}[i*3 + 2]);}}")
+                self.print(f"{setSignature} {{{v}[i*3 + 0] = value[0]; {v}[i*3 + 1] = value[1]; {v}[i*3 + 2] = value[2];}}")
+                self.print("")
+
+            elif p.type()==Types.Quaternion:
+                self.print(f"{getSignature} {{return {tkw}({v}[i*4 + 0], {v}[i*4 + 1], {v}[i*4 + 2], {v}[i*4 + 3]);}}")
+                self.print(f"{setSignature} {{{v}[i*4 + 0] = value[0]; {v}[i*4 + 1] = value[1]; {v}[i*4 + 2] = value[2]; {v}[i*4 + 3] = value[3];}}")
+                self.print("")
+
+            elif p.type()==Types.Matrix:
+                self.print(f"{getSignature} {{" \
+                           f"return {tkw}({v}[i*9 + 0], {v}[i*9 + 1], {v}[i*9 + 2], "\
+                                        f"{v}[i*9 + 3], {v}[i*9 + 4], {v}[i*9 + 5], "\
+                                        f"{v}[i*9 + 6], {v}[i*9 + 7], {v}[i*9 + 8]);}}")
+                self.print(f"{setSignature} {{" \
+                           f"{v}[i*9 + 0] = value[0]; {v}[i*9 + 1] = value[1]; {v}[i*9 + 2] = value[2]; "\
+                            f"{v}[i*9 + 3] = value[3]; {v}[i*9 + 4] = value[4]; {v}[i*9 + 5] = value[5]; "\
+                            f"{v}[i*9 + 6] = value[6]; {v}[i*9 + 7] = value[7]; {v}[i*9 + 8] = value[8];}}")
+                self.print("")
+            
+            
+        self.print.add_indent(-4)
+        self.print("};")
+        self.print("")
+
     def generate_pairs_object_structure(self):
         self.print("")
         self.print("struct PairsObjects {")
@@ -285,52 +353,69 @@ class CGen:
         self.print("private:")
         self.print("    PairsRuntime *pairs_runtime;")
         self.print("    struct PairsObjects *pobj;")
+        self.print("    friend class PairsAccessor;")
         self.print("public:")
 
-        self.print("    void initialize() {")
-        self.print(f"        pairs_runtime = new PairsRuntime({nprops}, {ncontactprops}, {narrays}, {part});")
-        self.print(f"        pobj = new PairsObjects();")
+        self.print.add_indent(4)
+
+        self.print("void initialize() {")
+        self.print(f"    pairs_runtime = new PairsRuntime({nprops}, {ncontactprops}, {narrays}, {part});")
+        self.print(f"    pobj = new PairsObjects();")
         self.print.add_indent(4)
         self.generate_statement(initialize_module.block)
         self.print.add_indent(-4)
-        self.print("    }")
+        self.print("}")
         self.print("")
 
-        self.print("    void create_domain(int argc, char **argv) {")
+        self.print("void create_domain(int argc, char **argv) {")
         self.print.add_indent(4)
         self.generate_statement(create_domain_module.block)
         self.print.add_indent(-4)
-        self.print("    }")
+        self.print("}")
         self.print("")
 
-        self.print("    template<typename Domain_T>")
-        self.print("    void use_domain(std::shared_ptr<Domain_T> domain_ptr) {")
-        self.print("        pairs_runtime->useDomain(domain_ptr);")
-        self.print("    }")
+        self.print("template<typename Domain_T>")
+        self.print("void use_domain(const std::shared_ptr<Domain_T> &domain_ptr) {")
+        self.print("    pairs_runtime->useDomain(domain_ptr);")
+        self.print("}")
         self.print("")
 
-        self.print("    void setup_sim() {")
+        self.print("void create_halfspace(double x, double y, double z, double nx, double ny, double nz, int type, int flag){")
+        self.print("    pairs::create_halfspace(pairs_runtime, x, y, z, nx, ny, nz, type, flag);")
+        self.print("}")
+        self.print("")
+
+        self.print("void create_particle(double x, double y, double z, double vx, double vy, double vz, double density, double radius, int type, int flag){")
+        self.print("    pairs::create_particle(pairs_runtime, x, y, z, vx, vy, vz, density, radius, type, flag);")
+        self.print("}")
+        self.print("")
+
+        self.print("void setup_sim() {")
         self.print.add_indent(4)
         self.generate_statement(setup_sim_module.block)
         self.print.add_indent(-4)
-        self.print("    }")
+        self.print("}")
         self.print("")
 
-        self.print("    void do_timestep(int timestep) {")
-        self.print("        pobj->sim_timestep = timestep;")
+        self.print("void do_timestep(int timestep) {")
+        self.print("    pobj->sim_timestep = timestep;")
         self.print.add_indent(4)
         self.generate_statement(do_timestep_module.block)
         self.print.add_indent(-4)
-        self.print("    }")
+        self.print("}")
         self.print("")
 
-        self.print("    void end() {")
-        self.print("        pairs::print_timers(pairs_runtime);")
-        self.print("        pairs::print_stats(pairs_runtime, pobj->nlocal, pobj->nghost);")
-        self.print("        delete pobj;")
-        self.print("        delete pairs_runtime;")
-        self.print("    }")
+        self.print("void end() {")
+        self.print("    pairs::print_timers(pairs_runtime);")
+        self.print("    pairs::print_stats(pairs_runtime, pobj->nlocal, pobj->nghost);")
+        self.print("    delete pobj;")
+        self.print("    delete pairs_runtime;")
+        self.print("}")
+
+        self.print.add_indent(-4)
         self.print("};")
+
+        self.generate_host_pairs_accessor_class()
         
         self.print.end()
         self.generate_full_object_names = False
@@ -367,6 +452,7 @@ class CGen:
         else:
             self.print(f"void {module.name}(struct PairsObjects *pobj) {{")
             self.print.add_indent(4)
+            device_cond = module.run_on_device and self.target.is_gpu()
 
             if self.debug:
                 self.print(f"PAIRS_DEBUG(\"{module.name}\\n\");")
@@ -377,32 +463,37 @@ class CGen:
 
             for var in module.write_variables():
                 type_kw = Types.c_keyword(self.sim, var.type())
-                self.print(f"{type_kw} *{var.name()} = &(pobj->{var.name()});")
+                name = f"rv_{var.name()}.getDevicePointer()" if device_cond and var.device_flag else var.name()
+                self.print(f"{type_kw} *{var.name()} = &(pobj->{name});")
 
             for array in module.arrays():
                 type_kw = Types.c_keyword(self.sim, array.type())
-                self.print(f"{type_kw} *{array.name()} = pobj->{array.name()};")
+                name = array.name() if not device_cond else f"d_{array.name()}"
+                self.print(f"{type_kw} *{array.name()} = pobj->{name};")
 
                 if array in module.host_references():
                     self.print(f"{type_kw} *h_{array.name()} = pobj->{array.name()};")
 
             for prop in module.properties():
                 type_kw = Types.c_keyword(self.sim, prop.type())
-                self.print(f"{type_kw} *{prop.name()} = pobj->{prop.name()};")
+                name = prop.name() if not device_cond else f"d_{prop.name()}"
+                self.print(f"{type_kw} *{prop.name()} = pobj->{name};")
 
                 if prop in module.host_references():
                     self.print(f"{type_kw} *h_{prop.name()} = pobj->{prop.name()};")
 
             for contact_prop in module.contact_properties():
                 type_kw = Types.c_keyword(self.sim, contact_prop.type())
-                self.print(f"{type_kw} *{contact_prop.name()} = pobj->{contact_prop.name()};")
+                name = contact_prop.name() if not device_cond else f"d_{contact_prop.name()}"
+                self.print(f"{type_kw} *{contact_prop.name()} = pobj->{name};")
 
                 if contact_prop in module.host_references():
                     self.print(f"{type_kw} *h_{contact_prop.name()} = pobj->{contact_prop.name()};")
 
             for feature_prop in module.feature_properties():
                 type_kw = Types.c_keyword(self.sim, feature_prop.type())
-                self.print(f"{type_kw} *{feature_prop.name()} = pobj->{feature_prop.name()};")
+                name = feature_prop.name() if not device_cond else f"d_{feature_prop.name()}"
+                self.print(f"{type_kw} *{feature_prop.name()} = pobj->{name};")
 
                 if feature_prop in module.host_references():
                     self.print(f"{type_kw} *h_{feature_prop.name()} = pobj->{feature_prop.name()};")
@@ -719,7 +810,8 @@ class CGen:
         if isinstance(ast_node, CopyVar):
             var_name = ast_node.variable().name()
             ctx_suffix = "Device" if ast_node.context() == Contexts.Device else "Host"
-            self.print(f"rv_{var_name}.copyTo{ctx_suffix}();")
+            ref = self.generate_object_reference(ast_node.variable(), device=True)
+            self.print(f"{ref}.copyTo{ctx_suffix}();")
 
         if isinstance(ast_node, For):
             iterator = self.generate_expression(ast_node.iterator)
@@ -918,7 +1010,8 @@ class CGen:
 
             if not self.kernel_context and self.target.is_gpu() and ast_node.var.device_flag:
                 addr = self.generate_object_address(ast_node.var)
-                self.print(f"rv_{var_name} = pairs_runtime->addDeviceVariable({addr});")
+                ref = self.generate_object_reference(ast_node.var, device=True)
+                self.print(f"{prefix_decl}{ref} = pairs_runtime->addDeviceVariable({addr});")
 
         if isinstance(ast_node, While):
             cond = self.generate_expression(ast_node.cond)
diff --git a/src/pairs/ir/types.py b/src/pairs/ir/types.py
index ea1d40c784552d9f8bd922210e282f5a5904f400..fcd8f1b22d4b981b0beeb5440cd3dfd89f0742a2 100644
--- a/src/pairs/ir/types.py
+++ b/src/pairs/ir/types.py
@@ -13,6 +13,22 @@ class Types:
     Matrix = 10
     Quaternion = 11
 
+    def walberla_keyword(sim, t):
+        real_kw = 'double' if sim.use_double_precision() else 'float'
+        return (
+            real_kw if t==Types.Real
+            else f'walberla::math::Vector3<{real_kw}>' if t==Types.Vector
+            else f'walberla::math::Matrix3<{real_kw}>' if t==Types.Matrix
+            else f'walberla::math::Quaternion<{real_kw}>' if t==Types.Quaternion
+            else 'float' if t == Types.Float
+            else 'double' if t == Types.Double
+            else 'int' if t == Types.Int32
+            else 'long long int' if t == Types.Int64
+            else 'unsigned long long int' if t == Types.UInt64
+            else 'bool' if t == Types.Boolean
+            else '<invalid type>'
+        )
+
     def c_keyword(sim, t):
         real_kw = 'double' if sim.use_double_precision() else 'float'
         return (