diff --git a/apps/showcases/PorousMediaGPU/GridGeneration.h b/apps/showcases/PorousMediaGPU/GridGeneration.h
index 484cdd9e94e42db7a7403f7b1af667527c7d9b7e..53e67e02d48087498890dd61c7e71ccce8602e01 100644
--- a/apps/showcases/PorousMediaGPU/GridGeneration.h
+++ b/apps/showcases/PorousMediaGPU/GridGeneration.h
@@ -33,7 +33,7 @@
 #include <fstream>
 
 #include "Setup.h"
-#include "TPMS.h"
+// #include "TPMS.h"
 #include "Types.h"
 
 
@@ -62,8 +62,8 @@ void createSetupBlockForest(SetupBlockForest& setupBfs, const Setup& setup, cons
 
    if(useMPIManager) {numProcesses = uint_c(mpi::MPIManager::instance()->numProcesses());}
 
-   TPMSRefinementSelection tpmsRefinementSelection( setup );
-   setupBfs.addRefinementSelectionFunction(std::function<void(SetupBlockForest &)>(tpmsRefinementSelection));
+   // TPMSRefinementSelection tpmsRefinementSelection( setup );
+   // setupBfs.addRefinementSelectionFunction(std::function<void(SetupBlockForest &)>(tpmsRefinementSelection));
 
    // SphereBlockExclusion SphereBlockExclusion( Sphere );
    // setupBfs.addBlockExclusionFunction(SphereBlockExclusion);
diff --git a/apps/showcases/PorousMediaGPU/InitializerFunctions.cpp b/apps/showcases/PorousMediaGPU/InitializerFunctions.cpp
index 05c984a6179a2a45be07aaae492d5e69e8d0fd61..e94eab551caee207f679acdb1d3cd5d53508cf88 100644
--- a/apps/showcases/PorousMediaGPU/InitializerFunctions.cpp
+++ b/apps/showcases/PorousMediaGPU/InitializerFunctions.cpp
@@ -65,4 +65,77 @@ void initGrid(const shared_ptr< StructuredBlockStorage >& blocks, const BlockDat
    }
 }
 
+void initHemisphers(const shared_ptr< StructuredBlockStorage >& blocks, const BlockDataID flagFieldID, const Setup& setup,
+                    const Config::BlockHandle& wallParameters){
+
+
+   WALBERLA_LOG_INFO_ON_ROOT("Creating rough wall")
+
+   const real_t radius   = wallParameters.getParameter< real_t >("hemispheresRadius");
+   const real_t hemispheresDistance = wallParameters.getParameter< real_t >("hemispheresDistance");
+
+   const real_t diameter = real_c(2.0) * radius;
+   const real_t spaceX   = (math::root_three * std::sqrt(hemispheresDistance * hemispheresDistance) ) / real_c(2.0);
+
+   const real_t radiusSquared = radius * radius;
+   std::vector<AABB> aabbs;
+
+   auto domain = blocks->getDomain();
+
+   const real_t xMax = domain.xMax();
+   const real_t yMax = domain.yMax();
+   const real_t zMax = domain.zMax();
+   
+   const uint_t nx = uint_c(std::ceil(xMax / spaceX) + 1);
+   const uint_t nz = uint_c(std::ceil(zMax / hemispheresDistance) + 1);
+
+   for(uint_t i = 0; i < nx; ++i){
+      for(uint_t j = 0; j < nz; ++j){
+         const real_t xc = real_c(i) * spaceX;
+         real_t zc = real_c(j) * hemispheresDistance;
+         if(i % 2 != 0){
+            zc += diameter;
+         }
+
+         const Vector3<real_t> minCornerBottom(xc - radius, real_c(0.0), zc - radius);
+         const Vector3<real_t> maxCornerBottom(xc + radius, radius, zc + radius);
+         const Vector3<real_t> minCornerTop(xc - radius, yMax - radius, zc - radius);
+         const Vector3<real_t> maxCornerTop(xc + radius, yMax, zc + radius);
+
+         const AABB minaabb(minCornerBottom, maxCornerBottom);
+         const AABB maxaabb(minCornerTop, maxCornerTop);
+
+         aabbs.emplace_back(minaabb);
+         aabbs.emplace_back(maxaabb);
+      }
+   }
+
+   for (auto& block : *blocks){
+      const auto flagField    = block.getData< FlagField_T >(flagFieldID);
+      const auto boundaryFlag = flagField->getOrRegisterFlag(setup.obstacleUID);
+      
+      WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(flagField, Cell globalCell;
+         blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z));
+         Vector3<real_t> p;
+         blocks->getCellCenter(p, globalCell, 0);
+
+         for(auto aabb: aabbs){
+            if (aabb.contains(p)){
+               if(hemispheresContains(p, aabb.center(), radiusSquared)) {
+                  
+                  addFlag(flagField->get(x, y, z), boundaryFlag);
+               }
+            }
+         }
+      )
+   }
+
+   
+}
+
+bool hemispheresContains(const Vector3<real_t>& point, const Vector3<real_t>& center, const real_t radiusSquared )
+{
+   return (point - center).sqrLength() <= radiusSquared;
+}
+
 } // namespace walberla
diff --git a/apps/showcases/PorousMediaGPU/InitializerFunctions.h b/apps/showcases/PorousMediaGPU/InitializerFunctions.h
index e0173352d9281a5cdf8a4491d936b5b4250db5ff..004441e68f690bb60a4f5286e91e7ac3ef2f95bd 100644
--- a/apps/showcases/PorousMediaGPU/InitializerFunctions.h
+++ b/apps/showcases/PorousMediaGPU/InitializerFunctions.h
@@ -30,4 +30,6 @@
 
 namespace walberla{
 void initGrid(const shared_ptr< StructuredBlockStorage >& blocks, const BlockDataID flagFieldID, const Setup& setup);
+void initHemisphers(const shared_ptr< StructuredBlockStorage >& blocks, const BlockDataID flagFieldID, const Setup& setup, const Config::BlockHandle& wallParameters);
+bool hemispheresContains(const Vector3<real_t>& point, const Vector3<real_t>& center, const real_t radiusSquared);
 } // namespace walberla
diff --git a/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp b/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp
index 8245b0eb69776e0e66188a3e48217e950239f7b6..3b91770a5db4eb61b6969eff0d885c49156f579d 100644
--- a/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp
+++ b/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp
@@ -129,7 +129,6 @@ int main(int argc, char** argv){
    auto boundariesConfig  = config->getBlock("Boundaries");
 
    Setup setup(config, infoMap);
-   TPMS tpms(setup);
    WALBERLA_LOG_INFO_ON_ROOT( "Simulation run data:"
                               "\n- simulation parameters:"   << std::setprecision(16) <<
                               "\n   + collision model:     " << setup.collisionModel <<
@@ -196,18 +195,14 @@ int main(int argc, char** argv){
 
    auto inflow = std::make_shared<Inflow>(blocks, setup, ids);
 
-   auto probe = std::make_shared<Probe>(blocks, setup, ids);
+   auto probe = std::make_shared<Probe>(blocks, setup, ids, config->getOneBlock("Probes"));
 
-   // Vector3<real_t> test(1.5, 0.5, 0.5);
-   probe->addPoint(Vector3<real_t>(1.5, 0.5, 0.5 ));
-   probe->addPoint(Vector3<real_t>(3.51, 0.5, 0.5));
-   probe->addPoint(Vector3<real_t>(100.51, 0.5, 0.5));
+   // probe->addPoint(Vector3<real_t>(1.5, 0.5, 0.5 ));
+   // probe->addPoint(Vector3<real_t>(2.5, 0.5, 0.5 ));
+   // probe->addPoint(Vector3<real_t>(3.5, 0.5, 0.5));
 
    probe->initialise();
 
-   return EXIT_SUCCESS;
-
- 
    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    ///                                      LB SWEEPS AND BOUNDARY HANDLING                                       ///
    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -227,13 +222,21 @@ int main(int argc, char** argv){
       WALBERLA_LOG_INFO_ON_ROOT(++step << ". Setting Grid after inflow to produce turbulence structure")
       initGrid(blocks, ids.flagField, setup);
    }
-   
-   // tpms.setupBoundary(blocks, ids.flagField);
+
+   if(config->getNumBlocks("RoughWall") > 0){
+      initHemisphers(blocks, ids.flagField, setup, config->getOneBlock("RoughWall"));
+   }
+
+   if(config->getNumBlocks("TPMS") > 0){
+      auto tpms = std::make_shared<TPMS>(config->getOneBlock("TPMS"), setup);
+      tpms->setupBoundary(blocks, ids.flagField);
+   }
+
    geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, ids.flagField, setup.fluidUID);
    // BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfFieldGPU, fluidFlagUID, latticeVelocity);
    BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfFieldGPU, setup.fluidUID, ids.velocityFieldInflowSliceGPU, ids.pdfField);
 
-   if( loggingParameters.getParameter< bool >("writeSurfaceMeshOfTPMS", false) ){
+   if(config->getNumBlocks("TPMS") > 0 && loggingParameters.getParameter< bool >("writeSurfaceMeshOfTPMS", false) ){
       WALBERLA_LOG_INFO_ON_ROOT(++step << ". Writing TPMS as surface file")
       auto mesh = postprocessing::flagFieldToSurfaceMesh< FlagField_T >(blocks, ids.flagField, setup.obstacleFlag, true);
       WALBERLA_EXCLUSIVE_WORLD_SECTION(0){
@@ -257,6 +260,7 @@ int main(int argc, char** argv){
    SweepTimeloop timeloop(blocks->getBlockStorage(), setup.timesteps);
 
    timeloop.addFuncBeforeTimeStep(inflow->updateGPUFunctor(), "Update inflow velocity");
+   timeloop.addFuncBeforeTimeStep(probe->updateFunctor(), "Update Probe Data");
 
    lbm_generated::BasicRecursiveTimeStepGPU< GPUPdfField_T, SweepCollection_T, BoundaryCollection_T >
       LBMMeshRefinement(blocks, ids.pdfFieldGPU, sweepCollection, boundaryCollection, nonUniformCommunication, nonUniformPackInfo);
diff --git a/apps/showcases/PorousMediaGPU/PorousMediaGPU.py b/apps/showcases/PorousMediaGPU/PorousMediaGPU.py
index 88bb8e1f7122fe5937913315f23a0a492333478c..17298b5aac7b20e3b719a1b3ab318c6cfab87b58 100644
--- a/apps/showcases/PorousMediaGPU/PorousMediaGPU.py
+++ b/apps/showcases/PorousMediaGPU/PorousMediaGPU.py
@@ -7,7 +7,7 @@ from pystencils.simp import insert_aliases, insert_constants
 from lbmpy import LBStencil, LBMConfig, LBMOptimisation
 from lbmpy.boundaries.boundaryconditions import ExtrapolationOutflow, UBB, NoSlip, FixedDensity
 from lbmpy.creationfunctions import create_lb_collision_rule
-from lbmpy.enums import Method, Stencil
+from lbmpy.enums import Method, Stencil, ForceModel
 
 from pystencils_walberla import CodeGeneration, generate_info_header
 from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator
@@ -22,6 +22,7 @@ std::map<std::string, std::string> infoMap{{{{"stencil", "{stencil}"}},
 
 omega = sp.symbols("omega")
 inlet_velocity = sp.symbols("u_x")
+force_vector = tuple([sp.Symbol("F_x"), 0.0, 0.0])
 max_threads = 256
  
 with CodeGeneration() as ctx:
@@ -52,6 +53,8 @@ with CodeGeneration() as ctx:
         stencil=stencil,
         relaxation_rate=omega,
         compressible=True,
+        force_model=ForceModel.GUO,
+        force=force_vector,
         # subgrid_scale_model=SubgridScaleModel.QR,
         fourth_order_correction=0.01 if method_enum == Method.CUMULANT and stencil.Q == 27 else False,
         field_name='pdfs',
diff --git a/apps/showcases/PorousMediaGPU/Probe.cpp b/apps/showcases/PorousMediaGPU/Probe.cpp
index 6e88e548287f5ed9b74fda4802b5bef4371bddb6..f26b120a34e61543dbffd1f61b22d0512ff1a08b 100644
--- a/apps/showcases/PorousMediaGPU/Probe.cpp
+++ b/apps/showcases/PorousMediaGPU/Probe.cpp
@@ -23,10 +23,10 @@ namespace walberla{
 
 void Probe::initialise(){
 
-    std::map<IBlock*, std::vector<Vector3<real_t>> > pointsPerBlock;
+    if(probePoints_.empty()){return;}
 
+    std::map<IBlock*, std::vector<Vector3<real_t>> > pointsPerBlock;
     const AABB domain = blocks_->getDomain();
-    const uint_t timesteps = setup_.timesteps;
  
     for(auto p: probePoints_){
         if(!domain.contains(p)){continue;}
@@ -58,9 +58,122 @@ void Probe::initialise(){
             ++i;
         }
 
-        cpuDataVector.resize(4 * n * timesteps);
+        cpuDataVector.resize(4 * n * writeCounter_);
         dataVector->syncGPU();
+
+        const Vector3<uint_t> s(n, uint_c(1), uint_c(1));
+        Vector3<uint32_t> threadPerBlock;
+        Vector3<uint32_t> numberBlocks;
+
+        getBlockAndGridSize(s, threadPerBlock, numberBlocks);
+
+        gpuBlock_[&block] = threadPerBlock;
+        gpuGrid_[&block] = numberBlocks;
+    }
+
+    writeHeaderFiles();
+}
+
+void Probe::writeHeaderFiles(){
+
+    filename_ += std::to_string(mpi::MPIManager::instance()->rank()) + ".txt";
+
+    filesystem::path dataFile( filename_.c_str() );
+    if( filesystem::exists( dataFile ) )
+        std::remove( dataFile.string().c_str() );
+
+    std::ofstream outfile( filename_.c_str() );
+    outfile << std::setprecision(2);
+    uint_t counter = 0;
+
+    for (auto blockIterator = blocks_->begin(); blockIterator != blocks_->end(); ++blockIterator){
+        auto block = blockIterator.get();
+        auto* dataVector = block->getData< DataVector > ( probeDataID );
+        auto & cpuCellVector = dataVector->cpuCellVector();
+        const uint_t n = uint_c(dataVector->size());
+        for(uint_t i = 0; i < n; ++i){
+            const Cell localCell(cell_idx_c(cpuCellVector[i]), cell_idx_c(cpuCellVector[i + n]), cell_idx_c(cpuCellVector[i + 2*n]));
+            Vector3< real_t > p;
+            blocks_->getBlockLocalCellCenter(*block, localCell, p);
+            outfile << "p" << counter++ << "(" << p[0] << ", " << p[1] << ", " << p[2] << ")";
+            if( !( std::next(blockIterator) == blocks_->end() &&  i == n - 1) ){outfile << ", ";}
+        }
+    }
+
+    outfile << "\n";
+    outfile << "t,";
+
+    counter = 0;
+    for (auto blockIterator = blocks_->begin(); blockIterator != blocks_->end(); ++blockIterator){
+        auto block = blockIterator.get();
+        auto* dataVector = block->getData< DataVector > ( probeDataID );
+        auto & cpuCellVector = dataVector->cpuCellVector();
+        const uint_t n = uint_c(dataVector->size());
+        for(uint_t i = 0; i < n; ++i){
+            outfile << "rho" << counter << "," << "ux" << counter << "," << "uy" << counter << "," << "uz" << counter;
+            counter++;
+            if( !( std::next(blockIterator) == blocks_->end() &&  i == n - 1) ){outfile << ",";}
+        }
+    }
+    outfile << "\n";
+    outfile.close();
+
+}
+
+void Probe::writeProbes(){
+    std::ofstream outfile( filename_.c_str(), std::ios_base::app );
+    outfile << std::setprecision(8);
+
+    const real_t t = (real_c(totalExecutionCounter_ + frequency_ - uint_c(1)) * setup_.dt) - ( real_c(executionCounter_ * frequency_) * setup_.dt);
+    for(uint_t i = 0; i < executionCounter_; ++i){
+        outfile << t + real_c(i)*setup_.dt << ",";
+        for (auto blockIterator = blocks_->begin(); blockIterator != blocks_->end(); ++blockIterator){
+            auto block = blockIterator.get();
+            auto* dataVector = block->getData< DataVector > ( probeDataID );
+            if(i == 0){
+                dataVector->syncCPU();
+                dataVector->clear();
+            }
+            auto & cpuDataVector = dataVector->cpuDataVector();
+            const uint_t n = uint_c(dataVector->size());
+
+            for(uint_t j = 0; j < n; ++j){
+                for (uint_t k = 0; k < 4; ++k){
+                    outfile << cpuDataVector[i*4*n + n*k + j];
+                    if( !(j == n - 1 && k == 3) ){outfile << ",";}
+                }
+            }
+            outfile << "\n";
+        }
+    }
+    outfile.close();
+    executionCounter_ = 0;
+}
+
+void Probe::update(){
+    ++totalExecutionCounter_;
+    if (probePoints_.empty() || (totalExecutionCounter_ - uint_c(1)) % frequency_ != 0) return;
+
+    const real_t velocityScalingFactor = setup_.conversionFactor;
+
+    for (auto& block : *blocks_){
+
+        auto pdfs = block.getData< gpu::GPUField<double> >(ids_.pdfFieldGPU);
+
+        auto* dataVector = block.getData< DataVector > ( probeDataID );
+        int64_t* gpuCellVector = dataVector->gpuCellVector();
+        real_t* gpuDataVector = dataVector->advanceGPUDataVector();
+
+        const int64_t n = dataVector->size();
+
+        updateProbe(pdfs, gpuCellVector, gpuDataVector, n, velocityScalingFactor, gpuBlock_[&block], gpuGrid_[&block]);
+    }
+
+    ++executionCounter_;
+    if(executionCounter_ >= writeCounter_ || totalExecutionCounter_ == setup_.timesteps){
+        writeProbes();
     }
+    
 }
 
 
diff --git a/apps/showcases/PorousMediaGPU/Probe.h b/apps/showcases/PorousMediaGPU/Probe.h
index 4f6017aaa913781bf38c19a9719544571b99d2df..dabb2798c3846609fbcc85126b1cd234dc99c0fb 100644
--- a/apps/showcases/PorousMediaGPU/Probe.h
+++ b/apps/showcases/PorousMediaGPU/Probe.h
@@ -23,6 +23,7 @@
 
 #include "core/logging/Initialization.h"
 #include "core/math/Constants.h"
+#include "core/Filesystem.h"
 
 #include "gpu/GPUWrapper.h"
 #include "gpu/ErrorChecking.h"
@@ -54,18 +55,32 @@ class Probe
         int64_t* gpuCellVector() {return gpuCellVector_;}
         real_t* gpuDataVector() {return gpuDataVector_;}
 
-        void syncGPU()
-        {
+        real_t* advanceGPUDataVector() {
+          real_t* result = cur_;
+          cur_ += int64_c(4) * size();
+          return result;
+        }
+
+        inline void clear() { cur_ = begin_; }
+
+        int64_t size() const {return int64_c(cpuCellVector_.size() / 3);}
+
+        void syncGPU(){
           WALBERLA_GPU_CHECK(gpuMalloc((void **)&gpuCellVector_, sizeof(int64_t) * cpuCellVector_.size() ));
           WALBERLA_GPU_CHECK(gpuMemcpy( gpuCellVector_, &cpuCellVector_[0], sizeof(int64_t) * cpuCellVector_.size(), gpuMemcpyHostToDevice ));
 
           WALBERLA_GPU_CHECK(gpuMalloc((void **)&gpuDataVector_, sizeof(real_t) * cpuDataVector_.size() ));
           WALBERLA_GPU_CHECK(gpuMemcpy( gpuDataVector_, &cpuDataVector_[0], sizeof(real_t) * cpuDataVector_.size(), gpuMemcpyHostToDevice ));
 
+          begin_ = gpuDataVector_;
           cur_ = gpuDataVector_;
 
           synced = true;
         }
+
+        void syncCPU(){
+          WALBERLA_GPU_CHECK(gpuMemcpy(&cpuDataVector_[0], gpuDataVector_, sizeof(real_t) * cpuDataVector_.size(), gpuMemcpyDeviceToHost ));
+        }
         
     private:
         std::vector<int64_t> cpuCellVector_;
@@ -74,6 +89,7 @@ class Probe
         int64_t *gpuCellVector_;
         real_t *gpuDataVector_;
 
+        real_t *begin_ = nullptr;
         real_t *cur_ = nullptr;
 
         bool synced{false};
@@ -81,13 +97,16 @@ class Probe
 
 
 
-  Probe(const std::shared_ptr< StructuredBlockForest >& blocks, const Setup& setup, const IDs& ids)
-      : blocks_(blocks), setup_(setup), ids_(ids), frequency_(uint_c(1))
+  Probe(const std::shared_ptr< StructuredBlockForest >& blocks, const Setup& setup, const IDs& ids, const Config::BlockHandle& probeParameters)
+      : blocks_(blocks), setup_(setup), ids_(ids), executionCounter_(uint_c(0)), totalExecutionCounter_(uint_c(0))
     {
-        auto createProbeData = []( IBlock * const , StructuredBlockStorage * const ) { return new DataVector(); };
-        probeDataID = blocks_->addStructuredBlockData< DataVector >( createProbeData, "Probe Data");
 
+        frequency_ = probeParameters.getParameter< uint_t >("evaluationFrequency");
+        filename_ = probeParameters.getParameter< std::string >("filename");
+        writeCounter_ = probeParameters.getParameter< uint_t >("writeCounter");
 
+        auto createProbeData = []( IBlock * const , StructuredBlockStorage * const ) { return new DataVector(); };
+        probeDataID = blocks_->addStructuredBlockData< DataVector >( createProbeData, "Probe Data");
     }
  
   void addPoint(const Vector3<real_t>& point) {
@@ -95,8 +114,10 @@ class Probe
 
   }
 
-  void initialise();
+  std::function<void()>  updateFunctor() {return [this](){ Probe::update();};}
 
+  void initialise();
+  void update();
 
   private:
   const std::shared_ptr< StructuredBlockForest > blocks_;
@@ -105,11 +126,18 @@ class Probe
   BlockDataID probeDataID;
 
   uint_t frequency_;
+  std::string filename_;
+  uint_t writeCounter_;
+  uint_t executionCounter_;
+  uint_t totalExecutionCounter_;
 
   std::vector<Vector3<real_t>> probePoints_;
 
-  uint_t executionCounter_;
+  std::map<IBlock*, Vector3<uint32_t>> gpuBlock_;
+  std::map<IBlock*, Vector3<uint32_t>> gpuGrid_;
 
+  void writeHeaderFiles();
+  void writeProbes();
 
 }; // class Probe
 } // namespace walberla
diff --git a/apps/showcases/PorousMediaGPU/Setup.h b/apps/showcases/PorousMediaGPU/Setup.h
index 5d5b9f6ec6bcd84e1c1141f217fb1ae4ba8e180f..10a1029415458161af06e885f5864858f93e204d 100644
--- a/apps/showcases/PorousMediaGPU/Setup.h
+++ b/apps/showcases/PorousMediaGPU/Setup.h
@@ -38,7 +38,6 @@ struct Setup
       auto parameters        = config->getOneBlock("Parameters");
       auto domainParameters  = config->getOneBlock("DomainSetup");
       auto logging           = config->getOneBlock("Logging");
-      auto tpms              = config->getOneBlock("TPMS");
       auto turbulence        = config->getOneBlock("Turbulence");
 
 
@@ -66,13 +65,6 @@ struct Setup
       numberOfFourrierModes = turbulence.getParameter< uint_t >("numberOfFourrierModes");
       uc                    = turbulence.getParameter< Vector3< real_t > >("U_c");
 
-      waveNumber = tpms.getParameter< real_t >("waveNumber");
-      scaling    = tpms.getParameter< real_t >("scaling");
-      strut      = tpms.getParameter< real_t >("strut");
-      tpmsBeginn = tpms.getParameter< real_t >("start");
-      tpmsEnd    = tpms.getParameter< real_t >("end");
-      tpmsType   = tpms.getParameter< std::string >("type");
-
       const real_t latticeReferenceLength = referenceLength / coarseMeshSize;
       const real_t speedOfSound = real_c(real_c(1.0) / std::sqrt( real_c(3.0) ));
       machNumber = latticeVelocity / speedOfSound;
@@ -131,13 +123,6 @@ struct Setup
    uint_t numberOfFourrierModes;
    Vector3<real_t> uc;
 
-   real_t waveNumber;
-   real_t scaling;
-   real_t strut;
-   real_t tpmsBeginn;
-   real_t tpmsEnd;
-   std::string tpmsType;
-
    real_t machNumber;
    real_t reynoldsNumber;
 
diff --git a/apps/showcases/PorousMediaGPU/TPMS.cpp b/apps/showcases/PorousMediaGPU/TPMS.cpp
index 82e7ddc3064d8404bd6dc697556487b66a0fb9b7..f475c77dd875345e517e4029958fa2e5a66449bc 100644
--- a/apps/showcases/PorousMediaGPU/TPMS.cpp
+++ b/apps/showcases/PorousMediaGPU/TPMS.cpp
@@ -24,11 +24,11 @@ namespace walberla
 {
 
 real_t TPMS::getDistance(const real_t x, const real_t y, const real_t z, const real_t scaling) const {
-   const real_t kx = setup_.waveNumber * scaling;
-   const real_t ky = setup_.waveNumber * scaling;
-   const real_t kz = setup_.waveNumber * scaling;
+   const real_t kx = waveNumber_ * scaling;
+   const real_t ky = waveNumber_ * scaling;
+   const real_t kz = waveNumber_ * scaling;
 
-   if(tpmsType == 0){
+   if(tpmsType_ == GYROID){
       return cos(kx * x) * sin(ky * y) + cos(ky * y) * sin(kz * z) + cos(kz * z) * sin(kx * x);
    }
    else{
@@ -39,8 +39,8 @@ real_t TPMS::getDistance(const real_t x, const real_t y, const real_t z, const r
 bool TPMS::contains(const Vector3< real_t >& point) const
 {
    const real_t dist1 = getDistance(point[0], point[1], point[2], real_c(1.0));
-   const real_t dist2 = getDistance(point[0], point[1], point[2], setup_.scaling);
-   return (dist1 * dist2) >= setup_.strut;
+   const real_t dist2 = getDistance(point[0], point[1], point[2], scaling_);
+   return (dist1 * dist2) >= strut_;
 }
 
 bool TPMS::contains(const AABB& aabb) const
@@ -59,8 +59,8 @@ bool TPMS::contains(const AABB& aabb) const
 }
 
 void TPMS::setupBoundary(const std::shared_ptr< StructuredBlockForest >& sbfs, const BlockDataID flagFieldID){
-   const real_t start = setup_.tpmsBeginn / setup_.dxC;
-   const real_t end   = setup_.tpmsEnd / setup_.dxC;
+   const real_t start = tpmsBeginn_ / setup_.dxC;
+   const real_t end   = tpmsEnd_ / setup_.dxC;
    for (auto& block : *sbfs){
       const auto flagField = block.template getData< FlagField_T >(flagFieldID);
       const auto boundaryFlag = flagField->getOrRegisterFlag(setup_.obstacleUID);
diff --git a/apps/showcases/PorousMediaGPU/TPMS.h b/apps/showcases/PorousMediaGPU/TPMS.h
index dd7ac324128f06c5271410aeecd8992b17c950a6..e72a72a7e2e2865a7435dc62d6d68bac0ba4f177 100644
--- a/apps/showcases/PorousMediaGPU/TPMS.h
+++ b/apps/showcases/PorousMediaGPU/TPMS.h
@@ -31,6 +31,7 @@
 #include "core/math/AABB.h"
 #include "core/math/Vector3.h"
 #include "core/cell/Cell.h"
+#include "core/config/Config.h"
 
 #include "stencil/D3Q7.h"
 #include "stencil/D3Q27.h"
@@ -45,13 +46,25 @@ class TPMS
 {
  public:
 
-   TPMS(const Setup& setup) : setup_(setup){
-      if (setup.tpmsType.compare("gyroid") == 0){
-         tpmsType = 0;
+   enum TPMSType {GYROID = 0};
+
+   TPMS(const Config::BlockHandle& tpmsParameters, const Setup& setup) : setup_(setup){
+
+      waveNumber_ = tpmsParameters.getParameter< real_t >("waveNumber");
+      scaling_    = tpmsParameters.getParameter< real_t >("scaling");
+      strut_      = tpmsParameters.getParameter< real_t >("strut");
+      tpmsBeginn_ = tpmsParameters.getParameter< real_t >("start");
+      tpmsEnd_    = tpmsParameters.getParameter< real_t >("end");
+
+      const std::string tpmsString = tpmsParameters.getParameter< std::string >("type");
+
+      if (tpmsString.compare("gyroid") == 0){
+         tpmsType_ = GYROID;
       }
       else{
-         WALBERLA_ABORT("Unsoprted TPMS Type: " << setup.tpmsType);
+         WALBERLA_ABORT("Unsoprted TPMS Type: " << tpmsString);
       }
+         
    }
 
    real_t getDistance(const real_t x, const real_t y, const real_t z, const real_t scaling) const;
@@ -64,16 +77,24 @@ class TPMS
    void setupBoundary(const std::shared_ptr< StructuredBlockForest >& sbfs, const BlockDataID flagFieldID);
 
  private:
-   uint_t tpmsType;
+   real_t waveNumber_;
+   real_t scaling_;
+   real_t strut_;
+   real_t tpmsBeginn_;
+   real_t tpmsEnd_;
+   TPMSType tpmsType_;
    Setup setup_;
 }; // class TPMS
 
+/*
 class TPMSRefinementSelection
 {
  public:
-   TPMSRefinementSelection(const Setup& setup){
-      const real_t start = setup.tpmsBeginn;
-      const real_t end   = setup.tpmsEnd;
+   TPMSRefinementSelection(const TPMS& tpms){
+      const auto setup = tpms.getSetup();
+
+      const real_t start = tpms.tpmsBeginn;
+      const real_t end   = tpms.tpmsEnd;
       boundingBox_ = AABB(start, real_c(0.0), real_c(0.0),
                           end  , setup.domainSize[1], setup.domainSize[2]);
 
@@ -97,6 +118,6 @@ class TPMSRefinementSelection
    AABB boundingBox_;
 
 }; // class TPMSRefinementSelection
-
+*/
 
 } // namespace walberla
\ No newline at end of file
diff --git a/apps/showcases/PorousMediaGPU/PorousMediaGPU.prm b/apps/showcases/PorousMediaGPU/TPMS.prm
similarity index 89%
rename from apps/showcases/PorousMediaGPU/PorousMediaGPU.prm
rename to apps/showcases/PorousMediaGPU/TPMS.prm
index 5757f1d68a791c27116774368a630434f8cb8cf4..c3933e029060bb55056bb7d6a99b163c8c30c24d 100644
--- a/apps/showcases/PorousMediaGPU/PorousMediaGPU.prm
+++ b/apps/showcases/PorousMediaGPU/TPMS.prm
@@ -7,7 +7,7 @@ Parameters
     latticeVelocity 0.05;
     initialiseWithInletVelocity true;
 
-    timesteps 10001;
+    timesteps 1;
 
     processMemoryLimit 512; // MiB
     innerOuterSplit <1, 1, 1>;
@@ -66,13 +66,13 @@ StabilityChecker
 
 VTKWriter
 {
-    vtkWriteFrequency 10000;
+    vtkWriteFrequency 1;
     vtkGhostLayers 0;
     velocity true;
     writeVelocityAsMagnitude true;
     density false;
     flag false;
-    writeOnlySlice true;
+    writeOnlySlice false;
     sliceThickness 1;
     writeXZSlice false;
     amrFileFormat false;
@@ -90,10 +90,9 @@ Logging
     writeSurfaceMeshOfTPMS false;
 }
 
-Evaluation
+Probes
 {
-    evaluationCheckFrequency 500;
-    rampUpTime 0;
-    logToStream true;
-    logToFile true;
+    evaluationFrequency 1;
+    writeCounter 1000; // after how many evaluations the results are written to file
+    filename probes;
 }
\ No newline at end of file
diff --git a/apps/showcases/PorousMediaGPU/roughWall.prm b/apps/showcases/PorousMediaGPU/roughWall.prm
new file mode 100644
index 0000000000000000000000000000000000000000..d09a06a39e1f4c012827bb61cb64ff982e63f8d9
--- /dev/null
+++ b/apps/showcases/PorousMediaGPU/roughWall.prm
@@ -0,0 +1,93 @@
+Parameters
+{
+    reynoldsNumber 2500;
+
+    referenceVelocity 1.0;
+    referenceLength 1.0;
+    latticeVelocity 0.05;
+    initialiseWithInletVelocity true;
+
+    timesteps 1;
+
+    processMemoryLimit 512; // MiB
+    innerOuterSplit <1, 1, 1>;
+
+    // GPU related Parameters, only used if GPU is enabled
+    gpuEnabledMPI false;
+}
+
+Turbulence
+{
+    useGrid false;
+    U_rms  0.01;
+    characteristicLength 0.4;
+    numberOfFourrierModes 128;
+    U_c < 2.0, 0.0, 0.0 >;
+}
+
+RoughWall
+{
+    hemispheresRadius 1;
+    hemispheresDistance 4;
+}
+
+DomainSetup
+{
+    domainSize    < 60, 40, 12 >;
+    cellsPerBlock < 400, 80, 80 >;
+    // cellsPerBlock < 768, 96, 96 >;
+    // cellsPerBlock < 1536, 192, 192 >;
+    blocks    < 1, 1, 1 >;
+    periodic    < true, false, false >;
+    refinementLevels 0;
+    numberProcesses 1; // This is for load balancing, overwritten if more than one proc is used
+    blockForestFilestem porousMediaBlockForest;
+}
+//! [domainSetup]
+
+Boundaries
+{
+    Border { direction N;    walldistance -1;  flag NoSlip; }
+    Border { direction S;    walldistance -1;  flag NoSlip; }
+}
+
+
+StabilityChecker
+{
+    checkFrequency 0;
+    streamOutput   false;
+    vtkOutput      true;
+}
+
+VTKWriter
+{
+    vtkWriteFrequency 1;
+    vtkGhostLayers 0;
+    velocity true;
+    writeVelocityAsMagnitude true;
+    density false;
+    flag false;
+    writeOnlySlice false;
+    sliceThickness 1;
+    writeXZSlice false;
+    amrFileFormat false;
+    oneFilePerProcess false;
+    samplingResolution -1;
+    initialWriteCallsToSkip 0;
+}
+
+Logging
+{
+    logLevel info;  // info progress detail tracing
+    writeSetupForestAndReturn false; // When only one process is used the decomposition is writen and the program terminates
+    readSetupFromFile false;
+    remainingTimeLoggerFrequency 20; // in seconds
+    writeSurfaceMeshOfTPMS false;
+}
+
+Probes
+{
+    evaluationFrequency 1;
+    writeCounter 1000; // after how many evaluations the results are written to file
+    filename probes;
+}
\ No newline at end of file
diff --git a/apps/showcases/PorousMediaGPU/utilGPU.cu b/apps/showcases/PorousMediaGPU/utilGPU.cu
index 87ae21283fc2be0c164349019d335933dc736ded..8efd2fe45e686d91f18f0e676570e1f885887442 100644
--- a/apps/showcases/PorousMediaGPU/utilGPU.cu
+++ b/apps/showcases/PorousMediaGPU/utilGPU.cu
@@ -68,8 +68,8 @@ static __device__ void densityVelocityOdd(double* const _data_pdfs,
    *uY = u_1;
    *uZ = u_2;
 }
-
-__device__ void warpReduce(volatile double *uX, volatile double *uY, volatile double *uZ, unsigned int tid) 
+/*
+static __device__ void warpReduce(volatile double *uX, volatile double *uY, volatile double *uZ, unsigned int tid) 
 {
    uX[tid] += uX[tid+32]; uY[tid] += uY[tid+32]; uZ[tid] += uZ[tid+32];
    uX[tid] += uX[tid+16]; uY[tid] += uY[tid+16]; uZ[tid] += uZ[tid+16];
@@ -78,7 +78,7 @@ __device__ void warpReduce(volatile double *uX, volatile double *uY, volatile do
    uX[tid] += uX[tid+ 2]; uY[tid] += uY[tid+ 2]; uZ[tid] += uZ[tid+ 2];
    uX[tid] += uX[tid+ 1]; uY[tid] += uY[tid+ 1]; uZ[tid] += uZ[tid+ 1];
 }
-
+*/
 static __device__ void singleMode(double* rX, double* rY, double* rZ, const double kappaN, const double ut,
                                   const double pX, const double pY, const double pZ, const int64_t linear_index)
 {
@@ -215,6 +215,37 @@ static __global__ void summOfFourrierModesKernel(double * velocity, double * con
       }
    }
 }
+
+
+static __global__ void evaluateProbePoints(double* const pdfs, int64_t* cellData, double* probeData,
+                                           const int64_t n, const uint8_t timestep, const real_t velocityScalingFactor,
+                                           const int64_t xStride, const int64_t yStride, const int64_t zStride, const int64_t fStride)
+{
+   if(blockDim.x*blockIdx.x + threadIdx.x < n)
+   {
+
+      const int64_t ctr = blockDim.x*blockIdx.x + threadIdx.x;
+
+      const int64_t x = cellData[ctr];
+      const int64_t y = cellData[ctr + n];
+      const int64_t z = cellData[ctr + 2*n];
+
+      double rho, uX, uY, uZ; 
+
+      if(((timestep & 1) ^ 1)) {
+         densityVelocityEven(pdfs, &rho, &uX, &uY, &uZ, x, y, z, xStride, yStride, zStride, fStride);
+      }
+      else{
+         densityVelocityOdd(pdfs, &rho, &uX, &uY, &uZ, x, y, z, xStride, yStride, zStride, fStride);
+      }
+
+      probeData[ctr] = rho;
+      probeData[ctr + n] = velocityScalingFactor * uX;
+      probeData[ctr + 2*n] = velocityScalingFactor * uY;
+      probeData[ctr + 3*n] = velocityScalingFactor * uZ;
+   }
+}
+
 }
 
 
@@ -260,5 +291,23 @@ void updateVelocityPlane(gpu::GPUField<real_t>* velocityGPU, real_t* kappa, real
 
 }
 
+void updateProbe(gpu::GPUField<real_t>* pdfs, int64_t* cellData, real_t* probeData, const int64_t n, const real_t velocityScalingFactor,
+                 const Vector3<uint32_t>& block, const Vector3<uint32_t>& grid){
+
+   dim3 block_(block[0], block[1], block[2]);
+   dim3 grid_(grid[0], grid[1], grid[2]);
+
+   double * data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+   const uint8_t timestep = pdfs->getTimestep();
+  
+   const int64_t xStride = int64_c(pdfs->xStride());
+   const int64_t yStride = int64_c(pdfs->yStride());
+   const int64_t zStride = int64_c(pdfs->zStride());
+   const int64_t fStride = int64_c(pdfs->fStride());
+   internal::evaluateProbePoints<<<grid_, block_, 0, nullptr>>>(data_pdfs, cellData, probeData, n, timestep, velocityScalingFactor,
+                                                                xStride, yStride, zStride, fStride);
+
+}
+
 
 } // namespace walberla
\ No newline at end of file
diff --git a/apps/showcases/PorousMediaGPU/utilGPU.h b/apps/showcases/PorousMediaGPU/utilGPU.h
index 2e48ae4499edec7caee84ffe94fc66af28c1429b..d7c41492246b4b8bca4b27b22acc22583334a819 100644
--- a/apps/showcases/PorousMediaGPU/utilGPU.h
+++ b/apps/showcases/PorousMediaGPU/utilGPU.h
@@ -34,4 +34,7 @@ void updateVelocityPlane(gpu::GPUField<real_t>* velocityGPU, real_t* kappa, real
                          const real_t dx, const real_t t, const uint_t n, const real_t latticeVelocity,
                          const Vector3<uint32_t>& block, const Vector3<uint32_t>& grid);
 
+void updateProbe(gpu::GPUField<real_t>* pdfs, int64_t* cellData, real_t* probeData, const int64_t n, const real_t velocityScalingFactor,
+                 const Vector3<uint32_t>& block, const Vector3<uint32_t>& grid);
+
 } // namespace walberla
\ No newline at end of file