diff --git a/apps/showcases/Antidunes/Antidunes.cpp b/apps/showcases/Antidunes/Antidunes.cpp
index 48113869c8672be163c2c7c6e377d4b5b45f17a3..99b736a2508376d633e4177429437c3071a2b648 100644
--- a/apps/showcases/Antidunes/Antidunes.cpp
+++ b/apps/showcases/Antidunes/Antidunes.cpp
@@ -1235,7 +1235,7 @@ int main(int argc, char** argv)
    }
 
    if (vtkSpacingParticles != uint_t(0))
-   {
+   {bvc
       // sphere
       auto particleVtkOutput = make_shared< mesa_pd::vtk::ParticleVtkOutput >(particleStorage);
       particleVtkOutput->addOutput< mesa_pd::data::SelectParticleUid >("uid");
diff --git a/apps/showcases/PorousMediaGPU/DragEvaluation.h b/apps/showcases/PorousMediaGPU/DragEvaluation.h
index a344180b4a6f1f9ba5b602838984c31beff81918..9b57b4958969d43fd4416e5bd8b431d155699f51 100644
--- a/apps/showcases/PorousMediaGPU/DragEvaluation.h
+++ b/apps/showcases/PorousMediaGPU/DragEvaluation.h
@@ -86,7 +86,7 @@ class DragEvaluation
    uint_t writeCounter_;
    uint_t executionCounter_{0};
    uint_t totalExecutionCounter_{0};
-   uint_t rampUpTime_{100000};
+   uint_t rampUpTime_{0};
 
    uint_t meanQuantitiesCounter_{0};
 
diff --git a/apps/showcases/PorousMediaGPU/GridGeneration.h b/apps/showcases/PorousMediaGPU/GridGeneration.h
index 0dbafcbd201b23b0e18d62496fa0f0eccc09de18..139a2f29e8d19da3c868bb9c551f06eb0355314a 100644
--- a/apps/showcases/PorousMediaGPU/GridGeneration.h
+++ b/apps/showcases/PorousMediaGPU/GridGeneration.h
@@ -129,6 +129,13 @@ void createSetupBlockForest(SetupBlockForest& setupBfs, const Setup& setup, cons
 }
 
 void createBlockForest(shared_ptr< BlockForest >& bfs, const Setup& setup, const RefinementSelectionFunction& refinementSelection=defaultRefinement){
+
+   if (setup.loadSnapshot && !setup.recreateBlockForest){
+      const std::string BlockForestFile = setup.snapshotBaseFolder + "/" + setup.blockForestFilestem + ".file";
+      bfs = make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), BlockForestFile.c_str(), true, false);
+      return;
+   }
+
    if (mpi::MPIManager::instance()->numProcesses() > 1){
       // Load structured block forest from file
       std::ostringstream oss;
@@ -143,7 +150,7 @@ void createBlockForest(shared_ptr< BlockForest >& bfs, const Setup& setup, const
       }
       else{
          bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()),
-                                               setupBlockForestFilepath.c_str(), false);
+                                               setupBlockForestFilepath.c_str(), true, false);
       }
    }
    else{
@@ -151,4 +158,13 @@ void createBlockForest(shared_ptr< BlockForest >& bfs, const Setup& setup, const
       createSetupBlockForest(setupBfs, setup, refinementSelection);
       bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
    }
+
+   if (setup.storeSnapshot){
+      WALBERLA_CHECK_NOT_NULLPTR(bfs)
+
+      const std::string BlockForestFile = setup.snapshotBaseFolder + "/" + setup.blockForestFilestem + ".file";
+      bfs->saveToFile(BlockForestFile.c_str());
+   }
+
+
 }
\ No newline at end of file
diff --git a/apps/showcases/PorousMediaGPU/InitializerFunctions.cpp b/apps/showcases/PorousMediaGPU/InitializerFunctions.cpp
index 6c45517a989e38d5c6e8b79c9269dc041f65e3ee..7cd4f201d0181b540d883c615fe9e8ab5324d2d7 100644
--- a/apps/showcases/PorousMediaGPU/InitializerFunctions.cpp
+++ b/apps/showcases/PorousMediaGPU/InitializerFunctions.cpp
@@ -99,7 +99,6 @@ void initGenericPorousMatrix(const shared_ptr< StructuredBlockStorage >& blocks,
 
    const real_t poreSize     = gpmParameters.getParameter< real_t >("poreSize");
    const real_t barSize      = gpmParameters.getParameter< real_t >("barSize");
-   const real_t halfPoreSize = poreSize * real_c(0.5);
    const real_t PoreSize2    = poreSize * real_c(2.0);
    const real_t halfbarSize  = barSize * real_c(0.5);
 
@@ -107,6 +106,21 @@ void initGenericPorousMatrix(const shared_ptr< StructuredBlockStorage >& blocks,
    const real_t zMin = domain.zMin();
    const real_t zMax = domain.zMax();
 
+   std::vector<AABB> bars;
+
+   const uint_t xn = uint_c(std::ceil(domain.xMax() / poreSize));
+   const uint_t yn = uint_c(std::ceil(domain.xMax() / PoreSize2));
+
+   for(uint_t i = 0; i <= xn; ++i){
+      for(uint_t j = 0; j <= yn; ++j){
+         const real_t xc = real_c(i) * poreSize + barSize;
+         const real_t yc = i % 2 != 0 ? real_c(j) * PoreSize2 + barSize : real_c(j) * PoreSize2 + barSize + poreSize;
+
+         const AABB barAABB(xc - halfbarSize, yc - halfbarSize, zMin, xc + halfbarSize, yc + halfbarSize, zMax);
+         bars.emplace_back(barAABB);
+      }
+   }
+
    for (auto bIt = blocks->begin(); bIt != blocks->end(); ++bIt){
       Block& b             = dynamic_cast< Block& >(*bIt);
       const auto flagField    = b.getData< FlagField_T >(flagFieldID);
@@ -116,14 +130,25 @@ void initGenericPorousMatrix(const shared_ptr< StructuredBlockStorage >& blocks,
          Vector3< real_t > cellCenter = blocks->getBlockLocalCellCenter( b, it.cell() );
          blocks->mapToPeriodicDomain(cellCenter);
 
-         const real_t xc = std::round(cellCenter[0] / poreSize) * poreSize;
-         const real_t ys = real_c((uint_c(std::round(cellCenter[0] / poreSize)) % 2)) * poreSize;
-         const real_t yc = (std::round((cellCenter[1] - ys) / PoreSize2) * (PoreSize2)) + ys;
-
-         const AABB barAABB(xc - halfbarSize, yc - halfbarSize, zMin, xc + halfbarSize, yc + halfbarSize, zMax);
-         if(barAABB.contains(cellCenter)) {
-            addFlag(flagField->get(it.x(), it.y(), it.z()), boundaryFlag);
+         for(const auto& barAABB: bars){
+            if(barAABB.contains(cellCenter)) {
+               addFlag(flagField->get(it.x(), it.y(), it.z()), boundaryFlag);
+            }
          }
+
+         // WALBERLA_LOG_DEVEL_VAR(cellCenter[0] / poreSize)
+         // WALBERLA_LOG_DEVEL_VAR(std::round( (cellCenter[0]) / poreSize))
+
+         // const real_t xc = xMap[std::round( (cellCenter[0]) / (poreSize + barSize))];
+
+         // const real_t xc = std::round( (cellCenter[0]) / poreSize) * poreSize;
+         //const real_t ys = real_c((uint_c(std::round(cellCenter[0] / poreSize)) % 2)) * poreSize;
+         //const real_t yc = (std::round((cellCenter[1] - ys) / PoreSize2) * (PoreSize2)) + ys;
+
+         //const AABB barAABB(xc - halfbarSize, yc - halfbarSize, zMin, xc + halfbarSize, yc + halfbarSize, zMax);
+         //if(barAABB.contains(cellCenter)) {
+         //   addFlag(flagField->get(it.x(), it.y(), it.z()), boundaryFlag);
+        // }
          
       }
    }
@@ -199,4 +224,5 @@ void setVelocityFieldsAsmuth(const shared_ptr< StructuredBlockStorage >& blocks,
    }
 } // function setVelocityFieldsHenrik
 
+
 } // namespace walberla
diff --git a/apps/showcases/PorousMediaGPU/JinCase.prm b/apps/showcases/PorousMediaGPU/JinCase.prm
index 6ace3c8547924824ea051ec89371b84a66d9ebad..dd57dc225b5248d1844a9428502f8cb276642f2e 100644
--- a/apps/showcases/PorousMediaGPU/JinCase.prm
+++ b/apps/showcases/PorousMediaGPU/JinCase.prm
@@ -1,21 +1,40 @@
 Parameters
 {
-    reynoldsNumber 700;
-    latticeVelocity 0.05;
+    reynoldsNumber 500;
+    latticeVelocity 0.035;
     initialiseWithlatticeVelocity true;
     initialiseWithTurbulence false;
-    forceUpdateFrequency 0;
-    pressureGradient 0.188;
+    pressureGradient 0.190;
 
-    timesteps 100001;
+    timesteps 5000001;
 
     processMemoryLimit 512; // MiB
     innerOuterSplit <1, 1, 1>;
 
     // GPU related Parameters, only used if GPU is enabled
-    gpuEnabledMPI false;
+    gpuEnabledMPI True;
 }
 
+Snapshot
+{
+    loadSnapshot        false;
+    storeSnapshot       true;
+    recreateBlockForest true;
+    snapshotFrequency   10;
+    snapshotBaseFolder  snapshots;
+}
+
+ForcingParameters
+{
+    forceUpdateFrequency 1000;
+    adaptDrivingForce    false;
+    proportionalGain     2e-4;
+    derivativeGain       1e-6;
+    integralGain         2e-4;
+    maxRamp              1e-4;
+}
+
+
 Turbulence
 {
     useGrid false;
@@ -35,8 +54,9 @@ genericPorousMatrix
 DomainSetup
 {
     domainSize    < 12, 8, 4 >;
-    cellsPerBlock < 960, 640, 320 >;
-    blocks    < 1, 1, 1 >;
+    cellsPerBlock < 360, 240, 120 >;
+    // cellsPerBlock < 360, 240, 120 >;
+    blocks    < 2, 2, 2 >;
 
     periodic    < true, true, true >;
     refinementLevels 0;
@@ -59,12 +79,13 @@ StabilityChecker
 
 VTKWriter
 {
-    vtkWriteFrequency 10000;
+    vtkWriteFrequency 100000;
     vtkGhostLayers 0;
     velocity true;
     writeVelocityAsMagnitude false;
     density false;
     meanVelocity true;
+    reynoldsstress false;
     flag false;
     writeOnlySlice true;
     sliceThickness 1;
@@ -90,6 +111,7 @@ Probes
 {
     evaluationFrequency 1;
     writeCounter 1000; // after how many evaluations the results are written to file
+    rampUpTime 2000000;
 }
 
 DragEvaluation
diff --git a/apps/showcases/PorousMediaGPU/JinCaseInflow.prm b/apps/showcases/PorousMediaGPU/JinCaseInflow.prm
new file mode 100644
index 0000000000000000000000000000000000000000..1a14df767a52b9c67194f0a4e0764d31bbf3bfc6
--- /dev/null
+++ b/apps/showcases/PorousMediaGPU/JinCaseInflow.prm
@@ -0,0 +1,110 @@
+Parameters
+{
+    reynoldsNumber 500;
+    latticeVelocity 0.035;
+    initialiseWithlatticeVelocity true;
+    initialiseWithTurbulence false;
+    pressureGradient 0.190;
+
+    timesteps 100001;
+
+    processMemoryLimit 512; // MiB
+    innerOuterSplit <1, 1, 1>;
+
+    // GPU related Parameters, only used if GPU is enabled
+    gpuEnabledMPI True;
+}
+
+Snapshot
+{
+    loadSnapshot       false;
+    storeSnapshot      true;
+    snapshotFrequency  100000;
+    snapshotBaseFolder snapshots;
+}
+
+Turbulence
+{
+    useGrid false;
+    turbulentInflow true;
+    U_rms  0.035;
+    characteristicLength 0.1;
+    numberOfFourrierModes 1000;
+    U_c < 2.0, 0.0, 0.0 >;
+}
+
+genericPorousMatrix 
+{
+    poreSize 2;
+    barSize 1;
+}
+
+DomainSetup
+{
+    domainSize    < 12, 8, 4 >;
+    cellsPerBlock < 360, 240, 120 >;
+    // cellsPerBlock < 360, 240, 120 >;
+    blocks    < 2, 2, 2 >;
+
+    periodic    < false, true, true >;
+    refinementLevels 0;
+    numberProcesses 1; // This is for load balancing, overwritten if more than one proc is used
+    blockForestFilestem porousMediaBlockForest;
+}
+//! [domainSetup]
+
+Boundaries
+{
+    Border { direction W;    walldistance -1;  flag TurbulentInflow; }
+    Border { direction E;    walldistance -1;  flag Outflow; }
+}
+
+StabilityChecker
+{
+    checkFrequency 0;
+    streamOutput   false;
+    vtkOutput      true;
+}
+
+VTKWriter
+{
+    vtkWriteFrequency 100000;
+    vtkGhostLayers 0;
+    velocity true;
+    writeVelocityAsMagnitude false;
+    density false;
+    meanVelocity true;
+    reynoldsstress false;
+    flag false;
+    writeOnlySlice true;
+    sliceThickness 1;
+    writeXZSlice false;
+    amrFileFormat false;
+    oneFilePerProcess false;
+    samplingResolution -1;
+    initialWriteCallsToSkip 0;
+}
+
+Logging
+{
+    logLevel info;  // info progress detail tracing
+    writeSetupForestAndReturn false; // When only one process is used the decomposition is writen and the program terminates
+    readSetupFromFile false;
+    remainingTimeLoggerFrequency 20; // in seconds
+    writeSurfaceMeshOfTPMS false;
+    outputFolder output;
+    filestem Jin;
+}
+
+Probes
+{
+    evaluationFrequency 1;
+    writeCounter 1000; // after how many evaluations the results are written to file
+    rampUpTime 2000000;
+}
+
+DragEvaluation
+{
+    evaluationStart 0;
+    evaluationFrequency 0;
+}
\ No newline at end of file
diff --git a/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp b/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp
index 2d70f31d3837d347af45a1dd5f8c128b02eafde2..d2d09661ee46158ebf5bcc27468376232d1474f3 100644
--- a/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp
+++ b/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp
@@ -44,6 +44,7 @@
 #include "field/iterators/FieldIterator.h"
 #include "field/vtk/VTKWriter.h"
 #include "field/vtk/FlagFieldCellFilter.h"
+#include "field/blockforest/BlockDataHandling.h"
 
 #include "geometry/InitBoundaryHandling.h"
 #include "geometry/mesh/TriangleMeshIO.h"
@@ -101,9 +102,8 @@ using blockforest::communication::UniformBufferedScheme;
 using lbm_generated::UniformGeneratedPdfPackInfo;
 #endif
 
-inline Vector3< uint_t > inflowSliceSize( const shared_ptr< StructuredBlockStorage > & blocks, IBlock * const block ){
-   return Vector3<uint_t>( uint_c(1), blocks->getNumberOfYCells( *block ), blocks->getNumberOfZCells( *block ) );
-}
+using VelocityFieldDH_T = field::DefaultBlockDataHandling< VelocityField_T >;
+using TensorFieldDH_T = field::DefaultBlockDataHandling< TensorField_T >;
 
 class HemisphereWallDistance
 {
@@ -232,31 +232,36 @@ int main(int argc, char** argv){
    auto loggingParameters = config->getOneBlock("Logging");
    auto boundariesConfig  = config->getBlock("Boundaries");
 
+   const uint_t welfordSamplingStart = parameters.getParameter< uint_t >("welfordSamplingStart", 0);
+   const uint_t welfordSamplingFrequency = parameters.getParameter< uint_t >("welfordSamplingFrequency", 0);
+
    Setup setup(config, infoMap);
    WALBERLA_LOG_INFO_ON_ROOT( "Simulation run data:"
                               "\n- simulation parameters:"   << std::setprecision(16) <<
-                              "\n   + collision model:     " << setup.collisionModel <<
-                              "\n   + stencil:             " << setup.stencil <<
-                              "\n   + streaming:           " << setup.streamingPattern <<
-                              "\n   + compressible:        " << ( StorageSpecification_T::compressible ? "yes" : "no" ) <<
-                              "\n- simulation properties:  "
-                              "\n   + kin. viscosity:      " << setup.kinViscosity << " [m^2/s] (on the coarsest grid)" <<
-                              "\n   + viscosity LB:        " << setup.viscosity  << " [dx*dx/dt] (on the coarsest grid)" <<
-                              "\n   + omega:               " << setup.omega << " (on the coarsest grid)" <<
-                              "\n   + omega:               " << setup.omegaF << " (on the finest grid)" <<
-                              "\n   + inflow velocity:     " << setup.referenceVelocity << " [m/s]" <<
-                              "\n   + lattice velocity:    " << setup.latticeVelocity << " [dx/dt]" <<
-                              "\n   + friction velocity:   " << setup.frictionVelocity << " [dx/dt]" <<
-                              "\n   + Reynolds number:     " << setup.reynoldsNumber <<
-                              "\n   + Mach number:         " << setup.machNumber <<
-                              "\n   + driving force:       " << setup.drivingForce <<
-                              "\n   + dx (coarsest grid):  " << setup.dxC << " [m]" <<
-                              "\n   + dx (finest grid):    " << setup.dxF << " [m]" <<
-                              "\n   + dt (coarsest grid):  " << setup.dt << " [s]" <<
-                              "\n   + conversion Faktor:   " << setup.conversionFactor <<
-                              "\n   + #time steps:         " << setup.timesteps << " (on the coarsest grid, " << ( real_c(1.0) / setup.dt ) << " for 1s of real time)" <<
-                              "\n   + simulation time:     " << ( real_c( setup.timesteps ) * setup.dt ) << " [s]"
-                              "\n   + flow through cycles: " << ( setup.flowThroughCycles ))
+                              "\n   + collision model:       " << setup.collisionModel <<
+                              "\n   + stencil:               " << setup.stencil <<
+                              "\n   + streaming:             " << setup.streamingPattern <<
+                              "\n   + forceModel:            " << infoMap["forceModel"] <<
+                              "\n   + FourthOrderCorrection: " << infoMap["FourthOrderCorrection"] <<
+                              "\n   + compressible:          " << ( StorageSpecification_T::compressible ? "yes" : "no" ) <<
+                              "\n- simulation properties:    "
+                              "\n   + kin. viscosity:        " << setup.kinViscosity << " [m^2/s] (on the coarsest grid)" <<
+                              "\n   + viscosity LB:          " << setup.viscosity  << " [dx*dx/dt] (on the coarsest grid)" <<
+                              "\n   + omega:                 " << setup.omega << " (on the coarsest grid)" <<
+                              "\n   + omega:                 " << setup.omegaF << " (on the finest grid)" <<
+                              "\n   + inflow velocity:       " << setup.referenceVelocity << " [m/s]" <<
+                              "\n   + lattice velocity:      " << setup.latticeVelocity << " [dx/dt]" <<
+                              "\n   + friction velocity:     " << setup.frictionVelocity << " [dx/dt]" <<
+                              "\n   + Reynolds number:       " << setup.reynoldsNumber <<
+                              "\n   + Mach number:           " << setup.machNumber <<
+                              "\n   + driving force:         " << setup.drivingForce <<
+                              "\n   + dx (coarsest grid):    " << setup.dxC << " [m]" <<
+                              "\n   + dx (finest grid):      " << setup.dxF << " [m]" <<
+                              "\n   + dt (coarsest grid):    " << setup.dt << " [s]" <<
+                              "\n   + conversion Faktor:     " << setup.conversionFactor <<
+                              "\n   + #time steps:           " << setup.timesteps << " (on the coarsest grid, " << ( real_c(1.0) / setup.dt ) << " for 1s of real time)" <<
+                              "\n   + simulation time:       " << ( real_c( setup.timesteps ) * setup.dt ) << " [s]"
+                              "\n   + flow through cycles:   " << ( setup.flowThroughCycles ))
 
    bool writeSetupForestAndReturn = loggingParameters.getParameter< bool >("writeSetupForestAndReturn", false);
    if (uint_c(MPIManager::instance()->numProcesses()) > 1) writeSetupForestAndReturn = false;
@@ -276,14 +281,67 @@ int main(int argc, char** argv){
    blocks->createCellBoundingBoxes();
    auto finalDomain = blocks->getDomain();
 
+   uint_t beginTimeStep = 0;
+   uint_t welfordCtr = 0;
+   if (setup.loadSnapshot){
+      WALBERLA_ROOT_SECTION(){
+         std::ifstream file;
+         file.open(setup.checkpointConfigFile);
+         if (file.fail()) WALBERLA_ABORT("Error: " << setup.checkpointConfigFile << " could not be opened!");
+         file >> beginTimeStep;
+         file >> welfordCtr;
+         file.close();
+      }
+      mpi::broadcastObject(beginTimeStep);
+      mpi::broadcastObject(welfordCtr);
+
+      WALBERLA_LOG_INFO_ON_ROOT("Successfully read config parameters from checkpoint config file:")
+      WALBERLA_LOG_INFO_ON_ROOT(" - beginTimeStep = " << beginTimeStep)
+   }
+
    const StorageSpecification_T StorageSpec = StorageSpecification_T();
    // Creating fields
    uint_t step = 0;
    WALBERLA_LOG_INFO_ON_ROOT(++step << ". Allocating data")
    IDs ids;
-   ids.pdfField  = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, setup.numGhostLayers, field::fzyx);
-
    auto allocator = make_shared< gpu::HostFieldAllocator<macroFieldType> >(); // use pinned memory allocator for faster CPU-GPU memory transfers
+
+   auto defaultFieldSize = [](const shared_ptr< StructuredBlockStorage > & sbs, IBlock * const block){
+      return Vector3<uint_t>( sbs->getNumberOfXCells( *block ), sbs->getNumberOfYCells( *block ), sbs->getNumberOfZCells( *block ) );
+   };
+
+   auto inflowFieldSize = [](const shared_ptr< StructuredBlockStorage > & sbs, IBlock * const block){
+      return Vector3<uint_t>( uint_c(1), sbs->getNumberOfYCells( *block ), sbs->getNumberOfZCells( *block ) );
+   };
+
+   if (setup.loadSnapshot){
+      shared_ptr< lbm_generated::internal::PdfFieldHandling< StorageSpecification_T > > pdfFieldDataHandling =
+         make_shared< lbm_generated::internal::PdfFieldHandling< StorageSpecification_T > >( blocks, StorageSpec, setup.numGhostLayers, field::fzyx);
+
+      ids.pdfField = blocks->getBlockStorage().loadBlockData(setup.pdfFieldFile, pdfFieldDataHandling, "pdfs");
+
+      if(welfordSamplingFrequency > 0){
+
+         std::shared_ptr< VelocityFieldDH_T > meanVelocityDataHandling =
+         std::make_shared< VelocityFieldDH_T >(blocks, setup.numGhostLayers, macroFieldType(0.0), field::fzyx, defaultFieldSize, allocator);
+
+         std::shared_ptr< TensorFieldDH_T > sosDataHandling =
+         std::make_shared< TensorFieldDH_T >(blocks, setup.numGhostLayers, macroFieldType(0.0), field::fzyx, defaultFieldSize, allocator);
+
+         ids.meanVelocityField = blocks->getBlockStorage().loadBlockData(setup.meanVelocityFile, meanVelocityDataHandling, "mean velocity");
+         ids.sosField = blocks->getBlockStorage().loadBlockData(setup.sosFile, sosDataHandling, "sos velocity");
+
+      }
+
+      std::shared_ptr< VelocityFieldDH_T > inflowSliceDataHandling =
+      std::make_shared< VelocityFieldDH_T >(blocks, setup.numGhostLayers, macroFieldType(0.0), field::fzyx, inflowFieldSize);
+      ids.velocityFieldInflowSlice = blocks->getBlockStorage().loadBlockData(setup.inflowFile, inflowSliceDataHandling, "velocity Inflow");
+   }
+   else{
+      ids.pdfField  = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, setup.numGhostLayers, field::fzyx);
+      ids.velocityFieldInflowSlice    = field::addToStorage< VelocityField_T >(blocks, "velocity Inflow", inflowFieldSize, macroFieldType(0.0), field::fzyx, setup.numGhostLayers);
+   }
+
    ids.velocityField = field::addToStorage< VelocityField_T >(blocks, "vel", macroFieldType(0.0), field::fzyx, setup.numGhostLayers, allocator);
    ids.densityField  = field::addToStorage< ScalarField_T >(blocks, "density", macroFieldType(1.0), field::fzyx, setup.numGhostLayers, allocator);
    ids.flagField     = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field", setup.numGhostLayers);
@@ -293,38 +351,43 @@ int main(int argc, char** argv){
    ids.densityFieldGPU  = gpu::addGPUFieldToStorage< ScalarField_T >(blocks, ids.densityField, "velocity on GPU", true);
    ids.flagFieldGPU     = gpu::addGPUFieldToStorage< FlagField_T >(blocks, ids.flagField, "flag on GPU", true);
 
-   std::function< Vector3< uint_t > (const shared_ptr< StructuredBlockStorage >&, IBlock * const) > getInflowSliceSize = inflowSliceSize;
-   ids.velocityFieldInflowSlice    = field::addToStorage< VelocityField_T >(blocks, "velocity Inflow", getInflowSliceSize, macroFieldType(0.0), field::fzyx, setup.numGhostLayers);
    ids.velocityFieldInflowSliceGPU = gpu::addGPUFieldToStorage< VelocityField_T >(blocks, ids.velocityFieldInflowSlice, "velocity Inflow on GPU", true);
 
    const Cell innerOuterSplit = Cell(parameters.getParameter< Vector3<cell_idx_t> >("innerOuterSplit", Vector3<cell_idx_t>(1, 1, 1)));
    SweepCollection_T sweepCollection(blocks, ids.densityFieldGPU, ids.pdfFieldGPU, ids.velocityFieldGPU, setup.drivingForce, setup.omega, innerOuterSplit);
+   sweepCollection.initialiseBlockPointer();
    WALBERLA_LOG_INFO_ON_ROOT(++step << ". Initialising data")
 
-   if(setup.initialiseWithlatticeVelocity){
+   if (!setup.loadSnapshot){
+      if(setup.initialiseWithlatticeVelocity){
+         for (auto& block : *blocks){
+            auto velocity = block.getData< VelocityField_T >(ids.velocityField);
+            WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(velocity,
+               velocity->get(x, y, z, 0) = setup.latticeVelocity;
+            )
+         }
+      }
+
+      if(parameters.getParameter< bool >("initialiseWithTurbulence", false)){
+         setVelocityFieldsAsmuth(blocks, setup, ids);
+      }
+
+      gpu::fieldCpy< gpu::GPUField< VelocityField_T::value_type >, VelocityField_T >(blocks, ids.velocityFieldGPU, ids.velocityField);
       for (auto& block : *blocks){
-         auto velocity = block.getData< VelocityField_T >(ids.velocityField);
-         WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(velocity,
-            velocity->get(x, y, z, 0) = setup.latticeVelocity;
+         sweepCollection.initialise(&block, cell_idx_c(setup.numGhostLayers - 1));
+         auto velocityInflow = block.getData< VelocityField_T >(ids.velocityFieldInflowSlice);
+         WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(velocityInflow,
+            velocityInflow->get(x, y, z, 0) = setup.latticeVelocity;
          )
       }
-   }
 
-   if(parameters.getParameter< bool >("initialiseWithTurbulence", false)){
-      setVelocityFieldsAsmuth(blocks, setup, ids);
+      gpu::fieldCpy< gpu::GPUField< VelocityField_T::value_type >, VelocityField_T >(blocks, ids.velocityFieldInflowSliceGPU, ids.velocityFieldInflowSlice);
+   
    }
-
-   gpu::fieldCpy< gpu::GPUField< VelocityField_T::value_type >, VelocityField_T >(blocks, ids.velocityFieldGPU, ids.velocityField);
-   for (auto& block : *blocks){
-      sweepCollection.initialise(&block, cell_idx_c(setup.numGhostLayers - 1));
-      auto velocityInflow = block.getData< VelocityField_T >(ids.velocityFieldInflowSlice);
-      WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(velocityInflow,
-         velocityInflow->get(x, y, z, 0) = setup.latticeVelocity;
-      )
+   else{
+      for (auto& block : *blocks)
+         sweepCollection.calculateMacroscopicParameters(&block, cell_idx_c(setup.numGhostLayers - 1));
    }
-   sweepCollection.initialiseBlockPointer();
-   gpu::fieldCpy< gpu::GPUField< VelocityField_T::value_type >, VelocityField_T >(blocks, ids.velocityFieldInflowSliceGPU, ids.velocityFieldInflowSlice);
-
 
    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    ///                                      LB SWEEPS AND BOUNDARY HANDLING                                       ///
@@ -378,7 +441,7 @@ int main(int argc, char** argv){
       WALBERLA_LOG_INFO_ON_ROOT(++step << ". Writing TPMS as surface file")
       auto mesh = postprocessing::flagFieldToSurfaceMesh< FlagField_T >(blocks, ids.flagField, setup.obstacleFlag, true);
       WALBERLA_EXCLUSIVE_WORLD_SECTION(0){
-         geometry::writeMesh("TPMS.obj", *mesh);
+         geometry::writeMesh("output/TPMS.obj", *mesh);
       }
    }
    gpu::fieldCpy< GPUFlagField_T, FlagField_T >(blocks, ids.flagFieldGPU, ids.flagField);
@@ -413,43 +476,50 @@ int main(int argc, char** argv){
       LBMMeshRefinement.addPostBoundaryHandlingBlockFunction(evaluation->forceCalculationFunctor());
    }
 
-   const uint_t forceUpdateFrequency = parameters.getParameter< uint_t >("forceUpdateFrequency", 0);
-   auto bvc = std::make_shared<BulkVelocityCalculator>(blocks, ids, setup, forceUpdateFrequency);
-   if(forceUpdateFrequency > 0){
-      auto updateForce = [&](){
-      if(timeloop.getCurrentTimeStep() % forceUpdateFrequency  == 0) {
-            const real_t correctedDrivingForce = bvc->getDrivingForce();
-            if(parameters.getParameter< bool >("adaptDrivingForce", false)){
-               sweepCollection.setFx(correctedDrivingForce);
+   shared_ptr< BulkVelocityCalculator > bvc;
+   if(config->getNumBlocks("ForcingParameters") > 0){
+      WALBERLA_LOG_INFO_ON_ROOT(++step << ". Add bulkvelocity calculator")
+      bvc = std::make_shared<BulkVelocityCalculator>(blocks, ids, setup, config->getOneBlock("ForcingParameters"));
+
+      if(bvc->forceUpdateFrequency() > 0){
+         auto updateForce = [&](){
+         if(timeloop.getCurrentTimeStep() % bvc->forceUpdateFrequency()  == 0) {
+               const real_t correctedDrivingForce = bvc->getDrivingForce();
+               if(bvc->adaptDrivingForce()){
+                  WALBERLA_LOG_DEVEL_VAR(correctedDrivingForce)
+                  sweepCollection.setFx(correctedDrivingForce);
+               }
             }
-            
-         }
-      };
-      timeloop.addFuncBeforeTimeStep(updateForce, "Update force");
+         };
+         timeloop.addFuncBeforeTimeStep(updateForce, "Update force");
+      }
    }
 
+   std::function<void(IBlock*)> copyPDFs = gpu::fieldCpyFunctor< PdfField_T, gpu::GPUField< PdfField_T::value_type > >(ids.pdfField, ids.pdfFieldGPU);
+   std::function<void(IBlock*)> copyVelocityInflow = gpu::fieldCpyFunctor< VelocityField_T, gpu::GPUField< VelocityField_T::value_type > >(ids.velocityFieldInflowSlice, ids.velocityFieldInflowSliceGPU);
    std::function<void(IBlock*)> copyVelocity = gpu::fieldCpyFunctor< VelocityField_T, gpu::GPUField< VelocityField_T::value_type > >(ids.velocityField, ids.velocityFieldGPU);
    std::function<void(IBlock*)> copyDensity  = gpu::fieldCpyFunctor< ScalarField_T, gpu::GPUField< ScalarField_T::value_type > >(ids.densityField, ids.densityFieldGPU);
    std::function<void(IBlock*)> copyMeanVelocity;
    std::function<void(IBlock*)> copySOSField;
 
-   const uint_t welfordSamplingStart = parameters.getParameter< uint_t >("welfordSamplingStart", 0);
-   const uint_t welfordSamplingFrequency = parameters.getParameter< uint_t >("welfordSamplingFrequency", 0);
    std::shared_ptr<WelfordSweep_T> welfordSweep;
 
    if(welfordSamplingFrequency > 0){
 
       WALBERLA_LOG_INFO_ON_ROOT(++step << ". Adding welford algorithm to evaluate mean field quantities")
 
-      ids.meanVelocityField = field::addToStorage< VelocityField_T >(blocks, "mean velocity", macroFieldType(0.0), field::fzyx, setup.numGhostLayers, allocator);
-      ids.sosField = field::addToStorage< TensorField_T >(blocks, "sos velocity", macroFieldType(0.0), field::fzyx, setup.numGhostLayers, allocator);
+      if (!setup.loadSnapshot){
+         ids.meanVelocityField = field::addToStorage< VelocityField_T >(blocks, "mean velocity", macroFieldType(0.0), field::fzyx, setup.numGhostLayers, allocator);
+         ids.sosField = field::addToStorage< TensorField_T >(blocks, "sos velocity", macroFieldType(0.0), field::fzyx, setup.numGhostLayers, allocator);
+      }
+      
       ids.meanVelocityFieldGPU = gpu::addGPUFieldToStorage< VelocityField_T >(blocks, ids.meanVelocityField, "mean velocity on GPU", true);
       ids.sosFieldGPU = gpu::addGPUFieldToStorage< TensorField_T >(blocks, ids.sosField, "sos field on GPU", true);
 
       copyMeanVelocity = gpu::fieldCpyFunctor< VelocityField_T, gpu::GPUField< VelocityField_T::value_type > >(ids.meanVelocityField, ids.meanVelocityFieldGPU);
       copySOSField     = gpu::fieldCpyFunctor< TensorField_T, gpu::GPUField< TensorField_T::value_type > >(ids.sosField, ids.sosFieldGPU);
 
-      welfordSweep = make_shared<WelfordSweep_T>(ids.meanVelocityFieldGPU, ids.sosFieldGPU, ids.velocityFieldGPU, real_c(0.0));
+      welfordSweep = make_shared<WelfordSweep_T>(ids.meanVelocityFieldGPU, ids.sosFieldGPU, ids.velocityFieldGPU, real_c(welfordCtr));
 
       auto welfordUpdate = [&](){
          if(timeloop.getCurrentTimeStep() > welfordSamplingStart  && (timeloop.getCurrentTimeStep() % welfordSamplingFrequency) == 0) {
@@ -469,7 +539,7 @@ int main(int argc, char** argv){
 
    auto inflow = std::make_shared<Inflow>(blocks, setup, ids);
    auto probe = std::make_shared<Probe>(blocks, setup, ids, config->getOneBlock("Probes"));
-
+   probe->setInitialTimestep(beginTimeStep);
 
 /*
    const real_t lenght = 8.0;
@@ -487,22 +557,32 @@ int main(int argc, char** argv){
    // }
    // probe->initialise();
 
-   
+   probe->addPoint(finalDomain.center());
 
-   for(uint_t i = 0; i < blocks->getXSize() * blocks->getNumberOfXCells(); ++i){
-       probe->addPoint(Vector3<real_t>(real_c(i) * setup.dxC, finalDomain.center()[1], finalDomain.center()[2] ));
-   }
+   const real_t xStart = finalDomain.xMin();
+   const uint_t spacing = uint_c(4);
+   const uint_t nx = uint_c(finalDomain.xSize() / setup.dxC);
 
-/*
-   for(uint_t i = 0; i < blocks->getYSize() * blocks->getNumberOfYCells(); ++i){
-       probe->addPoint(Vector3<real_t>(finalDomain.center()[0], real_c(i) * setup.dxC, finalDomain.center()[2] ));
+   for(uint_t j = 1; j < 2; ++j){
+      for(uint_t i = 0; i <= nx; i+=spacing){
+         probe->addPoint(Vector3<real_t>(xStart + real_c(i) * setup.dxC, real_c(2.0) + real_c(j) * real_c(2.0), finalDomain.center()[2] ));
+      }
    }
 
-   for(uint_t i = 0; i < blocks->getZSize() * blocks->getNumberOfZCells(); ++i){
-       probe->addPoint(Vector3<real_t>(finalDomain.center()[0], finalDomain.center()[1], real_c(i) * setup.dxC));
+   const real_t yStart = finalDomain.yMin();
+   const uint_t ny = uint_c(finalDomain.ySize() / setup.dxC);
+
+   for(uint_t j = 2; j < 3; ++j){
+      for(uint_t i = 0; i <= ny; i+=spacing){
+         probe->addPoint(Vector3<real_t>(real_c(2.0) + real_c(j) * real_c(2.0), yStart + real_c(i) * setup.dxC, finalDomain.center()[2] ));
+      }
    }
 
-*/
+   // for(uint_t i = 0; i < blocks->getYSize() * blocks->getNumberOfYCells(); ++i){
+   //     probe->addPoint(Vector3<real_t>(finalDomain.center()[0], real_c(i) * setup.dxC, finalDomain.center()[2] ));
+   // }
+
+
    probe->initialise();
 
    
@@ -534,9 +614,10 @@ int main(int argc, char** argv){
       vtkOutput->addBeforeFunction([&]() {
 
          for (auto& block : *blocks){
-            if (VTKWriter.getParameter< bool >("velocity")){copyVelocity(&block);}
-            if (VTKWriter.getParameter< bool >("density")){copyDensity(&block);}
-            if (VTKWriter.getParameter< bool >("meanVelocity") && copyMeanVelocity){copyMeanVelocity(&block);}
+            if (VTKWriter.getParameter< bool >("velocity", false)){copyVelocity(&block);}
+            if (VTKWriter.getParameter< bool >("density", false)){copyDensity(&block);}
+            if (VTKWriter.getParameter< bool >("meanVelocity", false) && copyMeanVelocity){copyMeanVelocity(&block);}
+            if (VTKWriter.getParameter< bool >("reynoldsstress", false) && copySOSField){copySOSField(&block);}
          }
 
          if(lastStepAsVolumeFile && timeloop.getCurrentTimeStep() == setup.timesteps - 1){
@@ -565,26 +646,29 @@ int main(int argc, char** argv){
          }
       }
 
-      if (VTKWriter.getParameter< bool >("velocity")){
+      if (VTKWriter.getParameter< bool >("velocity", false)){
          if (VTKWriter.getParameter< bool >("writeVelocityAsMagnitude")){
-            auto velWriter = make_shared< field::VTKMagnitudeWriter< VelocityField_T, float32 > >(ids.velocityField, "velocityMagnitude");
+            auto velWriter = make_shared< field::VTKMagnitudeWriter< VelocityField_T, float32 > >(ids.velocityField, "velocityMagnitude", setup.conversionFactor);
             vtkOutput->addCellDataWriter(velWriter);
          }
          else{
-            auto velWriter = make_shared< field::VTKWriter< VelocityField_T, float32 > >(ids.velocityField, "velocity");
+            auto velWriter = make_shared< field::VTKWriter< VelocityField_T, float32 > >(ids.velocityField, "velocity", setup.conversionFactor);
             vtkOutput->addCellDataWriter(velWriter);
          }
       }
       if (VTKWriter.getParameter< bool >("meanVelocity", false) && ids.meanVelocityField){
          auto meanVelWriter = make_shared< field::VTKWriter< VelocityField_T, float32 > >(ids.meanVelocityField, "meanVelocity");
          vtkOutput->addCellDataWriter(meanVelWriter);
-
       }
-      if (VTKWriter.getParameter< bool >("density")){
+      if (VTKWriter.getParameter< bool >("reynoldsstress", false) && ids.sosField){
+         auto sosWriter = make_shared< field::VTKWriter< TensorField_T, float32 > >(ids.sosField, "RR");
+         vtkOutput->addCellDataWriter(sosWriter);
+      }
+      if (VTKWriter.getParameter< bool >("density", false)){
          auto densityWriter = make_shared< field::VTKWriter< ScalarField_T, float32 > >(ids.densityField, "density");
          vtkOutput->addCellDataWriter(densityWriter);
       }
-      if (VTKWriter.getParameter< bool >("flag")){
+      if (VTKWriter.getParameter< bool >("flag", false)){
          auto flagWriter = make_shared< field::VTKWriter< FlagField_T > >(ids.flagField, "flag");
          vtkOutput->addCellDataWriter(flagWriter);
       }
@@ -608,6 +692,57 @@ int main(int argc, char** argv){
          "Stability check");
    }
 
+   if(setup.storeSnapshot){
+      auto snapShotFunction = [&](){
+         if(timeloop.getCurrentTimeStep() > uint_c(0)  && ( (timeloop.getCurrentTimeStep() + 1) % setup.snapshotFrequency) == 0) {
+
+            for (auto& block : *blocks){
+               copyPDFs(&block);
+               copyVelocityInflow(&block);
+               if(copyMeanVelocity){copyMeanVelocity(&block);}
+               if(copySOSField){copySOSField(&block);}
+            }
+            
+            blocks->saveBlockData(setup.pdfFieldFile + "_tmp", ids.pdfField);
+            blocks->saveBlockData(setup.inflowFile + "_tmp", ids.velocityFieldInflowSlice);
+
+            if(copyMeanVelocity){blocks->saveBlockData(setup.meanVelocityFile + "_tmp", ids.meanVelocityField);}
+            if(copySOSField){blocks->saveBlockData(setup.sosFile + "_tmp", ids.sosField);}
+
+            WALBERLA_ROOT_SECTION(){
+               uint_t welfordCounter = uint_c(0);
+               if(welfordSweep)
+                  welfordCounter = uint_c(welfordSweep->getCounter());
+
+               std::string tmpCheckpointConfigFile = setup.checkpointConfigFile + "_tmp";
+               std::ofstream file;
+               file.open(tmpCheckpointConfigFile.c_str());
+
+               file << std::setprecision(16);
+               file << timeloop.getCurrentTimeStep() + 1 << "\n";
+               file << welfordCounter << "\n";
+               file.close();
+
+               if(bvc){
+                  bvc->writeStateToFile();
+               }
+            }
+
+            WALBERLA_MPI_BARRIER();
+
+            WALBERLA_ROOT_SECTION(){
+               renameFile(setup.pdfFieldFile + "_tmp", setup.pdfFieldFile);
+               renameFile(setup.inflowFile + "_tmp", setup.inflowFile);
+               if(copyMeanVelocity){renameFile(setup.meanVelocityFile + "_tmp", setup.meanVelocityFile);}
+               if(copySOSField){renameFile(setup.sosFile + "_tmp", setup.sosFile);}
+               renameFile(setup.checkpointConfigFile + "_tmp", setup.checkpointConfigFile);
+               }
+         };
+      };
+      timeloop.addFuncAfterTimeStep(snapShotFunction, "Snapshot writing");
+   }
+
+
    WALBERLA_MPI_BARRIER()
 #if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
    WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
@@ -647,10 +782,10 @@ int main(int argc, char** argv){
          copySOSField(&block);
       }
 
-      const double ut = evaluation->getMeanFrictionVelocity();
+      const bool adaptForce = parameters.getParameter< bool >("adaptDrivingForce", false);
+
+      const double ut = adaptForce ? evaluation->getMeanFrictionVelocity() : setup.frictionVelocity;
       const double welfordCounter = welfordSweep->getCounter();
-      WALBERLA_LOG_DEVEL_VAR(ut)
-      WALBERLA_LOG_DEVEL_VAR(welfordCounter)
 
       plotter->writeResult(ut, welfordCounter);
    }
diff --git a/apps/showcases/PorousMediaGPU/PorousMediaGPU.py b/apps/showcases/PorousMediaGPU/PorousMediaGPU.py
index fbd9e7e4f1f3832d8cdffba57b7ecee7b4871100..3d8bb67a3bc2f68ad9905f3c91f5128bb81034bc 100644
--- a/apps/showcases/PorousMediaGPU/PorousMediaGPU.py
+++ b/apps/showcases/PorousMediaGPU/PorousMediaGPU.py
@@ -14,12 +14,17 @@ from pystencils_walberla import CodeGeneration, generate_info_header, generate_s
 from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator
 from lbmpy_walberla import RefinementScaling
 
+import pkg_resources
+
 info_header = """
 #pragma once
 #include <map>
 std::map<std::string, std::string> infoMap{{{{"stencil", "{stencil}"}},
                                            {{"streamingPattern", "{streaming_pattern}"}}, 
-                                           {{"collisionOperator", "{collision_operator}"}}}};
+                                           {{"collisionOperator", "{collision_operator}"}},
+                                           {{"forceModel", "{force_model}"}},
+                                           {{"FourthOrderCorrection", "{fourth_order_correction}"}},
+                                           {{"PythonEnv", "{python_env}"}}}};
 """
 
 omega = sp.symbols("omega")
@@ -54,13 +59,14 @@ with CodeGeneration() as ctx:
 
     # method_enum = Method.CENTRAL_MOMENT
     method_enum = Method.CUMULANT
+    force_model_enum = ForceModel.GUO
     lbm_config = LBMConfig(
         method=method_enum,
         stencil=stencil,
         relaxation_rate=omega,
         compressible=True,
         force=force_vector,
-        force_model=ForceModel.GUO,
+        force_model=force_model_enum,
         # fourth_order_correction=0.1 if method_enum == Method.CUMULANT and stencil.Q == 27 else False,
         streaming_pattern=streaming_pattern,
         output={'density': density_field, 'velocity': velocity_field}
@@ -122,11 +128,17 @@ with CodeGeneration() as ctx:
     # Info header containing correct template definitions for stencil and field
     generate_info_header(ctx, f'{name_prefix}InfoHeader',
                          field_typedefs=field_typedefs)
+    
+    installed_packages = pkg_resources.working_set
+    installed_packages_list = sorted([f"{i.key}=={i.version}" for i in installed_packages])
 
     infoHeaderParams = {
         'stencil': stencil.name.lower(),
         'streaming_pattern': streaming_pattern,
         'collision_operator': lbm_config.method.name.lower(),
+        'force_model': force_model_enum.name.lower(),
+        'fourth_order_correction': lbm_config.fourth_order_correction,
+        'python_env': installed_packages_list,
     }
 
     ctx.write_file(f"{name_prefix}StaticDefines.h", info_header.format(**infoHeaderParams))
\ No newline at end of file
diff --git a/apps/showcases/PorousMediaGPU/Probe.cpp b/apps/showcases/PorousMediaGPU/Probe.cpp
index aa06bcebc2bd55297a76378c6624f6d4ff7c8bdd..826946b4ae0da9752adc7804f12b57fe12f71c9e 100644
--- a/apps/showcases/PorousMediaGPU/Probe.cpp
+++ b/apps/showcases/PorousMediaGPU/Probe.cpp
@@ -26,8 +26,6 @@ void Probe::initialise(){
     if(probePoints_.empty()){return;}
 
     uint_t idx = 0;
-    std::vector<Vector3<real_t>> cellCenters;
- 
     for(auto p: probePoints_){
         IBlock* block = blocks_->getBlock(p);
         if(block != nullptr){
@@ -39,36 +37,33 @@ void Probe::initialise(){
                 Cell globalCell;
                 blocks_->transformBlockLocalToGlobalCell(globalCell, *block, localCell);
                 Vector3<real_t> gloablCellCenter = blocks_->getCellCenter(globalCell, level);
-                if(std::find(cellCenters.begin(), cellCenters.end(), gloablCellCenter) == cellCenters.end()){
-                    cellCenters.emplace_back(gloablCellCenter);
 
-                    globalPointsPerBlock_[block].emplace_back(std::make_pair(gloablCellCenter, idx));
-                    localCellsPerBlock_[block].emplace_back(localCell);
+                auto element = ProbeInfo(gloablCellCenter, localCell, idx);
+                if(std::find(probeData_[block].begin(), probeData_[block].end(), element) == probeData_[block].end()){
+                    probeData_[block].emplace_back(element);
                     idx++;
                 }
             }
         }
     }
 
-    if(globalPointsPerBlock_.empty()){return;}
-
-    for (auto const& it : localCellsPerBlock_){
+    for (auto const& it : probeData_){
         IBlock* block = it.first;
-        auto localCells = it.second;
+        auto probeInfos = it.second;
 
         auto * dataVector = block->getData< DataVector > ( probeDataID );
         auto & cpuCellVector = dataVector->cpuCellVector();
         auto & cpuDataVector = dataVector->cpuDataVector();
 
-        const uint_t n = localCells.size();
+        const uint_t n = probeInfos.size();
         cpuCellVector.resize(3 * n);
 
         uint_t i = 0;
-        for(const auto localCell: localCells){
+        for(const auto probeInfo: probeInfos){
 
-            cpuCellVector[i] = localCell[0];
-            cpuCellVector[i + n] = localCell[1];
-            cpuCellVector[i + 2*n] = localCell[2];
+            cpuCellVector[i] = probeInfo.localCell[0];
+            cpuCellVector[i + n] = probeInfo.localCell[1];
+            cpuCellVector[i + 2*n] = probeInfo.localCell[2];
             ++i;
         }
 
@@ -80,78 +75,73 @@ void Probe::initialise(){
         Vector3<uint32_t> numberBlocks;
 
         getBlockAndGridSize(s, threadPerBlock, numberBlocks);
-
-        gpuBlock_[block] = threadPerBlock;
-        gpuGrid_[block] = numberBlocks;
+        gpuBlocksAndThreads_[block] = std::make_pair(threadPerBlock, numberBlocks);
     }
-
-    extendSize[0] = 0;
-    selectStart[0] = 0;
-
     writeHeaderFiles();
 }
 
 void Probe::writeHeaderFiles(){
-
-    const hsize_t dsetSize[1]{ 1 };
-    const hsize_t maxSize[1]{ H5S_UNLIMITED };
-    const hsize_t chunkSize[1]{writeCounter_};
-
-    H5::DataSpace arrayDataSpace(1, dsetSize, maxSize);
-    H5::DSetCreatPropList props;
-    props.setChunk(1, chunkSize);
-
     const auto outputFolder = setup_.outputFolder;
     const auto fileStem = setup_.fileStem;
     const auto probeFolderString = outputFolder + "/" + fileStem + "Probes";
-
     filesystem::path probeFolder( probeFolderString );
+
     WALBERLA_ROOT_SECTION()
     {
-        if( filesystem::exists( probeFolder ))
-            filesystem::remove_all( probeFolder );
+        if(!setup_.loadSnapshot){
+            if( filesystem::exists( probeFolder ))
+                filesystem::remove_all( probeFolder );
 
-        if ( !filesystem::exists( probeFolder ) )
-            filesystem::create_directories( probeFolder );
+            if ( !filesystem::exists( probeFolder ) )
+                filesystem::create_directories( probeFolder );
+        }
     }
 
     WALBERLA_MPI_BARRIER()
+    if(probeData_.empty()){return;}
 
-    const auto fileName = probeFolderString + "/" + "ProbeRank_" + std::to_string(MPIManager::instance()->rank()) + ".h5";
-    filesystem::path hdfFilePath( fileName.c_str() );
+    if(!setup_.loadSnapshot){
+        const auto fileName = probeFolderString + "/" + "ProbeRank_" + std::to_string(MPIManager::instance()->rank()) + ".h5";
+        H5::FileAccPropList fileAccProps;
 
-    H5::FileAccPropList fileAccProps;
-    h5file_ = std::make_unique<H5::H5File>(hdfFilePath.c_str(), H5F_ACC_CREAT | H5F_ACC_RDWR, H5::FileCreatPropList::DEFAULT, fileAccProps);
+        const hsize_t dsetSize[1]{ 1 };
+        const hsize_t maxSize[1]{ H5S_UNLIMITED };
+        const hsize_t chunkSize[1]{writeCounter_};
+
+        H5::DataSpace arrayDataSpace(1, dsetSize, maxSize);
+        H5::DSetCreatPropList props;
+        props.setChunk(1, chunkSize);
 
-    H5::Group timeGroup = h5file_->createGroup("time");
-    timeGroup.createDataSet("t", io::h5Float(), arrayDataSpace, props);
+        auto h5file = H5::H5File(fileName.c_str(), H5F_ACC_CREAT | H5F_ACC_RDWR, H5::FileCreatPropList::DEFAULT, fileAccProps);
+        H5::Group timeGroup = h5file.createGroup("time");
+        timeGroup.createDataSet("t", io::h5Float(), arrayDataSpace, props);
 
-    for (auto const& it : globalPointsPerBlock_){
-        for(uint_t i = 0; i < it.second.size(); ++i){
-            const auto p = it.second[i].first;
-            const auto idx = it.second[i].second;
-            std::string probeName = "Probe_" + std::to_string(idx);
-            if (h5file_->exists(probeName)) { continue; }
-            H5::Group probeGroup = h5file_->createGroup(probeName);
+        for (auto const& it : probeData_){
+            for(uint_t i = 0; i < it.second.size(); ++i){
+                const auto p = it.second[i].globalPoint;
+                const auto idx = it.second[i].idx;
+                std::string probeName = "Probe_" + std::to_string(idx);
+                if (h5file.exists(probeName)) { continue; }
+                H5::Group probeGroup = h5file.createGroup(probeName);
 
-            hsize_t locationShape[1]{ 3 };
-            H5::DataSpace spacingAttrDspace(1, locationShape, locationShape);
-            H5::Attribute locationAttr = probeGroup.createAttribute("Location", io::h5FileFloat(), spacingAttrDspace);
+                hsize_t locationShape[1]{ 3 };
+                H5::DataSpace spacingAttrDspace(1, locationShape, locationShape);
+                H5::Attribute locationAttr = probeGroup.createAttribute("Location", io::h5FileFloat(), spacingAttrDspace);
 
-            float location[3]{float(p[0]), float(p[1]), float(p[2])};
-            locationAttr.write(io::h5Float(), location);
+                float location[3]{float(p[0]), float(p[1]), float(p[2])};
+                locationAttr.write(io::h5Float(), location);
 
-            for(auto name: dataSetNames){
-                probeGroup.createDataSet(name, io::h5Float(), arrayDataSpace, props);
+                for(auto name: dataSetNames){
+                    probeGroup.createDataSet(name, io::h5Float(), arrayDataSpace, props);
+                }
             }
         }
     }
 }
 
 void Probe::writeProbes(){
-    if(globalPointsPerBlock_.empty()){return;}
+    if(probeData_.empty()){return;}
 
-    extendSize[0] += executionCounter_;
     std::vector<float> timeData(executionCounter_);
     const real_t t = (real_c(totalExecutionCounter_ + frequency_ - uint_c(1)) * setup_.dt) - ( real_c(executionCounter_ * frequency_) * setup_.dt);
     for(uint_t i = 0; i < executionCounter_; ++i){
@@ -160,7 +150,7 @@ void Probe::writeProbes(){
 
     writeData(timeData, "time", "t");
 
-    for (auto const& it : globalPointsPerBlock_){
+    for (auto const& it : probeData_){
         IBlock* block = it.first;
         auto* dataVector = block->getData< DataVector > ( probeDataID );
         dataVector->syncCPU();
@@ -170,7 +160,7 @@ void Probe::writeProbes(){
     const uint_t nDatasets = dataSetNames.size();
 
     std::vector<float> tmp(executionCounter_);
-    for (auto const& it : globalPointsPerBlock_){
+    for (auto const& it : probeData_){
         IBlock* block = it.first;
 
         auto* dataVector = block->getData< DataVector > ( probeDataID );
@@ -179,7 +169,7 @@ void Probe::writeProbes(){
         const uint_t n = uint_c(dataVector->size());
         WALBERLA_CHECK_EQUAL(n, it.second.size())
         for(uint_t i = 0; i < n; ++i){
-            const auto idx = it.second[i].second;
+            const auto idx = it.second[i].idx;
 
             const std::string probeName = "Probe_" + std::to_string(idx);
             for(uint_t j = 0; j < nDatasets; ++j){
@@ -191,32 +181,43 @@ void Probe::writeProbes(){
             }
         }
     }
-
-    selectStart[0] += executionCounter_;
     executionCounter_ = 0;
 }
 
 void Probe::writeData(const std::vector<float>& data, const std::string& groupName, const std::string& dataSetName){
-    const hsize_t selectCount[1] { executionCounter_ };
+    const auto outputFolder = setup_.outputFolder;
+    const auto fileStem = setup_.fileStem;
+    const auto probeFolderString = outputFolder + "/" + fileStem + "Probes";
+    const auto fileName = probeFolderString + "/" + "ProbeRank_" + std::to_string(MPIManager::instance()->rank()) + ".h5";
+    H5::FileAccPropList fileAccProps;
+    auto h5file = H5::H5File(fileName.c_str(), H5F_ACC_RDWR, H5::FileCreatPropList::DEFAULT, fileAccProps);
 
-    auto group = h5file_->openGroup(groupName);
+    auto group = h5file.openGroup(groupName);
     H5::DataSet dataset = group.openDataSet(dataSetName);
 
-    dataset.extend(extendSize);
     H5::DataSpace dataspace = dataset.getSpace();
-    dataspace.selectHyperslab(H5S_SELECT_SET, selectCount, selectStart);
+    std::vector<hsize_t> dims(1);
+    dataspace.getSimpleExtentDims(dims.data(), NULL);
+
+    const hsize_t selectCount[1] { data.size() };
+    const hsize_t extendSize[1] { dims[0] + data.size() };
+
+    dataset.extend(extendSize);
+    H5::DataSpace newDataspace = dataset.getSpace();
+
+    newDataspace.selectHyperslab(H5S_SELECT_SET, selectCount, dims.data());
         
     H5::DataSpace memSpace(1, selectCount);
-    dataset.write(data.data(), io::h5Float(), memSpace, dataspace);
+    dataset.write(data.data(), io::h5Float(), memSpace, newDataspace);
 }
 
 void Probe::update(){    
     ++totalExecutionCounter_;
-    if (probePoints_.empty() || (totalExecutionCounter_ - uint_c(1)) % frequency_ != 0) return;
+    if (probePoints_.empty() || rampUpTime_ > totalExecutionCounter_ || (totalExecutionCounter_ - uint_c(1)) % frequency_ != 0) return;
 
     const float velocityScalingFactor = float_c(setup_.conversionFactor);
 
-    for (auto const& it : globalPointsPerBlock_){
+    for (auto const& it : probeData_){
         IBlock* block = it.first;
         auto density = block->getData< gpu::GPUField<double> >(ids_.densityFieldGPU);
         auto velocity = block->getData< gpu::GPUField<double> >(ids_.velocityFieldGPU);
@@ -226,7 +227,7 @@ void Probe::update(){
         float* gpuDataVector = dataVector->advanceGPUDataVector();
         const int64_t n = dataVector->size();
 
-        updateProbe(density, velocity, gpuCellVector, gpuDataVector, n, velocityScalingFactor, gpuBlock_[block], gpuGrid_[block]);
+        updateProbe(density, velocity, gpuCellVector, gpuDataVector, n, velocityScalingFactor, gpuBlocksAndThreads_[block].first, gpuBlocksAndThreads_[block].second);
     }
 
     ++executionCounter_;
diff --git a/apps/showcases/PorousMediaGPU/Probe.h b/apps/showcases/PorousMediaGPU/Probe.h
index 374547f88e0c8634cbb2fc2d8ac6c26a6841eef5..4bf483853332cb021b21b665223dde92ab47ccb0 100644
--- a/apps/showcases/PorousMediaGPU/Probe.h
+++ b/apps/showcases/PorousMediaGPU/Probe.h
@@ -36,13 +36,21 @@
 #include "Setup.h"
 #include "utilGPU.h"
 
-// #include <H5Cpp.h>
-
 namespace walberla{
 
 class Probe
 {
   public:
+
+  struct ProbeInfo{
+    Vector3<real_t> globalPoint;
+    Cell localCell;
+    uint_t idx;
+    ProbeInfo(Vector3<real_t> globalPoint_, Cell localCell_, uint_t idx_) : globalPoint(globalPoint_), localCell(localCell_), idx(idx_) {}
+    bool operator==(const ProbeInfo & o) const {return globalPoint == o.globalPoint;}
+  };
+
+
   class DataVector{
     public:
         DataVector() = default;
@@ -106,9 +114,9 @@ class Probe
   Probe(const std::shared_ptr< StructuredBlockForest >& blocks, const Setup& setup, const IDs& ids, const Config::BlockHandle& probeParameters)
       : blocks_(blocks), setup_(setup), ids_(ids), executionCounter_(uint_c(0)), totalExecutionCounter_(uint_c(0))
     {
-
-        frequency_ = probeParameters.getParameter< uint_t >("evaluationFrequency");
+        frequency_    = probeParameters.getParameter< uint_t >("evaluationFrequency");
         writeCounter_ = probeParameters.getParameter< uint_t >("writeCounter");
+        rampUpTime_   = probeParameters.getParameter< uint_t >("rampUpTime", 0);
 
         auto createProbeData = []( IBlock * const , StructuredBlockStorage * const ) { return new DataVector(); };
         probeDataID = blocks_->addStructuredBlockData< DataVector >( createProbeData, "Probe Data");
@@ -123,6 +131,10 @@ class Probe
 
   void initialise();
   void update();
+  void setInitialTimestep(const uint_t initialTimestep){
+    totalExecutionCounter_ = initialTimestep;
+  }
+
 
   private:
   const std::shared_ptr< StructuredBlockForest > blocks_;
@@ -134,19 +146,12 @@ class Probe
   uint_t writeCounter_;
   uint_t executionCounter_;
   uint_t totalExecutionCounter_;
+  uint_t rampUpTime_;
 
-  std::unique_ptr<H5::H5File> h5file_;
-
-  hsize_t extendSize[1];
-  hsize_t selectStart[1];
   const std::vector<std::string> dataSetNames{"rho", "ux", "uy", "uz"};
-
   std::vector<Vector3<real_t>> probePoints_;
-  std::map<IBlock*, std::vector<    std::pair< Vector3<real_t>, uint_t > > > globalPointsPerBlock_;
-  std::map<IBlock*, std::vector<Cell> > localCellsPerBlock_;
-
-  std::map<IBlock*, Vector3<uint32_t>> gpuBlock_;
-  std::map<IBlock*, Vector3<uint32_t>> gpuGrid_;
+  std::map<IBlock*, std::vector< ProbeInfo > > probeData_;
+  std::map<IBlock*, std::pair<Vector3<uint32_t>, Vector3<uint32_t>>> gpuBlocksAndThreads_;
 
   void writeHeaderFiles();
   void writeProbes();
diff --git a/apps/showcases/PorousMediaGPU/Setup.h b/apps/showcases/PorousMediaGPU/Setup.h
index 44ce970ebbb1f04bc075cbbaea35069f792a97cc..950234c9490dcc29abefebfedc43a79593e5ffc9 100644
--- a/apps/showcases/PorousMediaGPU/Setup.h
+++ b/apps/showcases/PorousMediaGPU/Setup.h
@@ -22,6 +22,7 @@
 #include "core/config/Config.h"
 #include "core/DataTypes.h"
 #include "core/math/Vector3.h"
+#include "core/waLBerlaBuildInfo.h"
 
 #include "lbm_generated/refinement/RefinementScaling.h"
 
@@ -44,7 +45,7 @@ struct Setup
       auto domainParameters  = config->getOneBlock("DomainSetup");
       auto logging           = config->getOneBlock("Logging");
       auto turbulence        = config->getOneBlock("Turbulence");
-
+      auto snapshot          = config->getOneBlock("Snapshot");
 
       blockForestFilestem = domainParameters.getParameter< std::string >("blockForestFilestem", "blockforest");
       numProcesses        = domainParameters.getParameter< uint_t >("numberProcesses");
@@ -73,6 +74,7 @@ struct Setup
       uc                    = turbulence.getParameter< Vector3< real_t > >("U_c");
 
       const real_t speedOfSound = real_c(real_c(1.0) / std::sqrt( real_c(3.0) ));
+      drivingForce = real_c(0.0);
 
       if(config->getNumBlocks("RoughWall") > 0){
          // kinViscosity                    = parameters.getParameter< real_t >("viscosity");
@@ -90,7 +92,8 @@ struct Setup
          omega      = real_c(real_c(1.0) / (real_c(3.0) * viscosity + real_c(0.5)));
 
          frictionVelocity = (reynoldsNumber * viscosity) / latticeReferenceLength;
-         drivingForce = calculateDrivingForce(frictionVelocity, latticeReferenceLength, latticeVelocity, latticeVelocity);
+         if(periodic[0])
+            drivingForce = calculateDrivingForce(frictionVelocity, latticeReferenceLength, latticeVelocity, latticeVelocity);
       }
       else if(config->getNumBlocks("genericPorousMatrix") > 0){
          auto gpm = config->getOneBlock("genericPorousMatrix");
@@ -106,7 +109,8 @@ struct Setup
 
          const real_t pressureGradient = parameters.getParameter< real_t >("pressureGradient");
          const real_t XYslice = real_c(rootBlocks[0] * cellsPerBlock[0] * rootBlocks[1] * cellsPerBlock[1]);
-         drivingForce = pressureGradient / XYslice;
+         if(periodic[0])
+            drivingForce = pressureGradient / real_c(cells[0] * cells[1]);
       }
       else{
          referenceVelocity = parameters.getParameter< real_t >("referenceVelocity");
@@ -115,7 +119,6 @@ struct Setup
          machNumber = latticeVelocity / speedOfSound;
          viscosity  = real_c((latticeVelocity * latticeReferenceLength) / reynoldsNumber);
          omega      = real_c(real_c(1.0) / (real_c(3.0) * viscosity + real_c(0.5)));
-         drivingForce = real_c(0.0);
 
          dt  = latticeVelocity / referenceVelocity * coarseMeshSize;
          kinViscosity = viscosity * coarseMeshSize * coarseMeshSize / dt;
@@ -154,11 +157,82 @@ struct Setup
       outputFolder                 = logging.getParameter< std::string >("outputFolder", "output");
       fileStem                     = logging.getParameter< std::string >("filestem", "data");
 
+      loadSnapshot        = snapshot.getParameter< bool >("loadSnapshot", false);
+      storeSnapshot       = snapshot.getParameter< bool >("storeSnapshot", false);
+      recreateBlockForest = snapshot.getParameter< bool >("recreateBlockForest", false);
+      snapshotFrequency   = snapshot.getParameter< uint_t >("snapshotFrequency", 0);
+      snapshotBaseFolder  = snapshot.getParameter< std::string >("snapshotBaseFolder", "snapshots");
+
+      checkpointConfigFile = snapshotBaseFolder + "/" + fileStem + "CheckpointConfig.file";
+      pdfFieldFile = snapshotBaseFolder + "/" + fileStem + "pdfs.file";
+      meanVelocityFile = snapshotBaseFolder + "/" + fileStem + "meanVelocity.file";
+      sosFile = snapshotBaseFolder + "/" + fileStem + "sos.file";
+      inflowFile = snapshotBaseFolder + "/" + fileStem + "inflow.file";
+
       WALBERLA_ROOT_SECTION(){
          if (!filesystem::is_directory(outputFolder) || !filesystem::exists(outputFolder)) { // Check if output folder exists
             filesystem::create_directory(outputFolder); // create output folder
          }
+
+         if (!filesystem::is_directory(snapshotBaseFolder) || !filesystem::exists(snapshotBaseFolder)) {
+            filesystem::create_directory(snapshotBaseFolder); 
+         }
+         
+         writeInfoFile(config, infoMap);
       }
+      WALBERLA_MPI_BARRIER()
+   }
+
+   void writeInfoFile(std::shared_ptr<Config>& config, std::map<std::string, std::string>& infoMap){
+      const auto fileName = outputFolder + "/" + fileStem + "MetaData.txt";
+      filesystem::path filePath( fileName.c_str() );
+      if( filesystem::exists( filePath ) )
+         std::remove( filePath.string().c_str() );
+
+      WALBERLA_CHECK_NOT_NULLPTR(config)
+      std::ofstream outfile( filePath.c_str() );
+
+      outfile << "- simulation parameters:"   << std::setprecision(16) <<
+      "\n   + collision model:       " << collisionModel <<
+      "\n   + stencil:               " << stencil <<
+      "\n   + streaming:             " << streamingPattern <<
+      "\n   + forceModel:            " << infoMap["forceModel"] <<
+      "\n   + FourthOrderCorrection: " << infoMap["FourthOrderCorrection"] <<
+      "\n   + compressible:          " << ( StorageSpecification_T::compressible ? "yes" : "no" ) <<
+      "\n- simulation properties:    "
+      "\n   + kin. viscosity:        " << kinViscosity << " [m^2/s] (on the coarsest grid)" <<
+      "\n   + viscosity LB:          " << viscosity  << " [dx*dx/dt] (on the coarsest grid)" <<
+      "\n   + omega:                 " << omega << " (on the coarsest grid)" <<
+      "\n   + omega:                 " << omegaF << " (on the finest grid)" <<
+      "\n   + inflow velocity:       " << referenceVelocity << " [m/s]" <<
+      "\n   + lattice velocity:      " << latticeVelocity << " [dx/dt]" <<
+      "\n   + friction velocity:     " << frictionVelocity << " [dx/dt]" <<
+      "\n   + Reynolds number:       " << reynoldsNumber <<
+      "\n   + Mach number:           " << machNumber <<
+      "\n   + driving force:         " << drivingForce <<
+      "\n   + dx (coarsest grid):    " << dxC << " [m]" <<
+      "\n   + dx (finest grid):      " << dxF << " [m]" <<
+      "\n   + dt (coarsest grid):    " << dt << " [s]" <<
+      "\n   + conversion Faktor:     " << conversionFactor <<
+      "\n   + #time steps:           " << timesteps << " (on the coarsest grid, " << ( real_c(1.0) / dt ) << " for 1s of real time)" <<
+      "\n   + simulation time:       " << ( real_c( timesteps ) * dt ) << " [s]"
+      "\n   + flow through cycles:   " << ( flowThroughCycles ) << "\n\n";
+
+      outfile << *config << "\n \n";
+
+      outfile << "- waLBerla Build Info:"  << 
+      "\n   + git SHA:          " << walberla::core::buildinfo::gitSHA1() <<
+      "\n   + build Type:       " << walberla::core::buildinfo::buildType() <<
+      "\n   + compiler Flags:   " << walberla::core::buildinfo::compilerFlags() <<
+      "\n   + build Machine:    " << walberla::core::buildinfo::buildMachine() <<
+      "\n   + source directory: " << walberla::core::buildinfo::sourceDir() <<
+      "\n   + binary directory: " << walberla::core::buildinfo::binaryDir() << "\n\n";
+
+      for (auto const& it : infoMap){
+         outfile << it.first << " " << it.second << "\n";
+      }
+
+      outfile.close();
    }
 
    std::string blockForestFilestem;
@@ -230,6 +304,18 @@ struct Setup
    real_t remainingTimeLoggerFrequency;
    std::string outputFolder;
    std::string fileStem;
+
+   bool loadSnapshot;
+   bool storeSnapshot;
+   bool recreateBlockForest;
+   uint_t snapshotFrequency;
+   std::string snapshotBaseFolder;
+
+   std::string checkpointConfigFile;
+   std::string pdfFieldFile;
+   std::string meanVelocityFile;
+   std::string sosFile;
+   std::string inflowFile;
 };
 
 }
\ No newline at end of file
diff --git a/apps/showcases/PorousMediaGPU/TPMS.h b/apps/showcases/PorousMediaGPU/TPMS.h
index e72a72a7e2e2865a7435dc62d6d68bac0ba4f177..32c3d5afc30db58e8b9202415571a301ff6fb1b3 100644
--- a/apps/showcases/PorousMediaGPU/TPMS.h
+++ b/apps/showcases/PorousMediaGPU/TPMS.h
@@ -56,6 +56,8 @@ class TPMS
       tpmsBeginn_ = tpmsParameters.getParameter< real_t >("start");
       tpmsEnd_    = tpmsParameters.getParameter< real_t >("end");
 
+      WALBERLA_LOG_DEVEL_VAR(waveNumber_)
+
       const std::string tpmsString = tpmsParameters.getParameter< std::string >("type");
 
       if (tpmsString.compare("gyroid") == 0){
diff --git a/apps/showcases/PorousMediaGPU/TPMS.prm b/apps/showcases/PorousMediaGPU/TPMS.prm
index abae5ca9eefb1840ad432781d72e1d0fd0a2cae3..6db87ac290346b4207383acb6c6e909b08f057ba 100644
--- a/apps/showcases/PorousMediaGPU/TPMS.prm
+++ b/apps/showcases/PorousMediaGPU/TPMS.prm
@@ -16,7 +16,7 @@ Parameters
     innerOuterSplit <1, 1, 1>;
 
     // GPU related Parameters, only used if GPU is enabled
-    gpuEnabledMPI false;
+    gpuEnabledMPI True;
 }
 
 Turbulence
diff --git a/apps/showcases/PorousMediaGPU/TPMSPeriodic.prm b/apps/showcases/PorousMediaGPU/TPMSPeriodic.prm
new file mode 100644
index 0000000000000000000000000000000000000000..bb947e4070074c94e97ee92fa726f4603a7c3677
--- /dev/null
+++ b/apps/showcases/PorousMediaGPU/TPMSPeriodic.prm
@@ -0,0 +1,124 @@
+Parameters
+{
+    reynoldsNumber 2500;
+
+    referenceVelocity 1.0;
+    referenceLength 1.0;
+    latticeVelocity 0.025;
+    initialiseWithlatticeVelocity false;
+
+    welfordSamplingStart 0;
+    welfordSamplingFrequency 1;
+
+    timesteps 10;
+
+    processMemoryLimit 512; // MiB
+    innerOuterSplit <1, 1, 1>;
+
+    // GPU related Parameters, only used if GPU is enabled
+    gpuEnabledMPI True;
+}
+
+Snapshot
+{
+    loadSnapshot       true;
+    storeSnapshot      true;
+    snapshotFrequency  10;
+    snapshotBaseFolder snapshots;
+}
+
+ForcingParameters
+{
+    forceUpdateFrequency 0;
+    adaptDrivingForce false;
+    // targetMeanVelocityMagnitude 0.02;
+    proportionalGain            2e-4;
+    derivativeGain              1e-6;
+    integralGain                2e-4;
+    maxRamp                     1e-4;
+}
+
+Turbulence
+{
+    useGrid false;
+    turbulentInflow true;
+    U_rms  0.025;
+    characteristicLength 2e-4;
+    numberOfFourrierModes 1000;
+    U_c < 1, 1, 1 >;
+}
+
+
+TPMS
+{
+    waveNumber 18.84955592;
+    scaling 1.0;
+    strut 0.75;
+    start 0.0;
+    end 9.0;
+    type gyroid;
+}
+
+//! [domainSetup]
+DomainSetup
+{
+    domainSize    < 9, 1, 1 >;
+    cellsPerBlock < 270, 30, 30 >;
+    // cellsPerBlock < 2700, 300, 300 >;
+    // cellsPerBlock < 768, 96, 96 >;
+    // cellsPerBlock < 1536, 192, 192 >;
+    blocks    < 1, 1, 1 >;
+    periodic    < true, true, true >;
+    refinementLevels 0;
+    numberProcesses 1; // This is for load balancing, overwritten if more than one proc is used
+    blockForestFilestem porousMediaBlockForest;
+}
+//! [domainSetup]
+
+Boundaries
+{
+
+}
+
+StabilityChecker
+{
+    checkFrequency 0;
+    streamOutput   false;
+    vtkOutput      true;
+}
+
+VTKWriter
+{
+    vtkWriteFrequency 10;
+    vtkGhostLayers 0;
+    velocity true;
+    writeVelocityAsMagnitude false;
+    meanVelocity true;
+    density false;
+    flag false;
+    writeOnlySlice true;
+    sliceThickness 1;
+    writeXZSlice false;
+    amrFileFormat false;
+    oneFilePerProcess false;
+    samplingResolution -1;
+    initialWriteCallsToSkip 0;
+    lastStepAsVolumeFile true;
+}
+
+Logging
+{
+    logLevel info;  // info progress detail tracing
+    writeSetupForestAndReturn false; // When only one process is used the decomposition is writen and the program terminates
+    readSetupFromFile false;
+    remainingTimeLoggerFrequency 20; // in seconds
+    writeSurfaceMeshOfTPMS true;
+    filestem TPMS;
+}
+
+Probes
+{
+    evaluationFrequency 1;
+    writeCounter 1000; // after how many evaluations the results are written to file
+    filename probeData.h5;
+}
\ No newline at end of file
diff --git a/apps/showcases/PorousMediaGPU/util.cpp b/apps/showcases/PorousMediaGPU/util.cpp
index c1a374962b11f51ac25d569b2dffd67cebd9e4bc..8724e44a6ae3de0c8d9b0749058c9c56cc6f22fd 100644
--- a/apps/showcases/PorousMediaGPU/util.cpp
+++ b/apps/showcases/PorousMediaGPU/util.cpp
@@ -134,4 +134,12 @@ real_t calculateDrivingForce(const real_t targetFrictionVelocity, const real_t r
    return targetFrictionVelocity * targetFrictionVelocity / referenceLenght + (targetBulkVelocity - bulkVelocity) * targetBulkVelocity / referenceLenght;
 }
 
+void renameFile(const std::string& oldName, const std::string& newName)
+{
+   int result = std::rename(oldName.c_str(), newName.c_str());
+   if (result != 0)
+      WALBERLA_LOG_WARNING_ON_ROOT("Could not rename file " << oldName << " to " << newName << " with error code "
+                                                            << result);
+}
+
 } // namespace walberla
\ No newline at end of file
diff --git a/apps/showcases/PorousMediaGPU/util.h b/apps/showcases/PorousMediaGPU/util.h
index f2c76729e684f325ab1ec6e6283cf3c02edf5a34..6f1235156edd07b717171934b0d66a0cac4b6695 100644
--- a/apps/showcases/PorousMediaGPU/util.h
+++ b/apps/showcases/PorousMediaGPU/util.h
@@ -39,4 +39,6 @@ void getAllHemisphereCenters(std::vector<Vector3<real_t>>& centers, const real_t
 
 real_t calculateDrivingForce(const real_t targetFrictionVelocity, const real_t referenceLenght, const real_t targetBulkVelocity, const real_t bulkVelocity);
 
+void renameFile(const std::string& oldName, const std::string& newName);
+
 } // namespace walberla
\ No newline at end of file
diff --git a/apps/showcases/PorousMediaGPU/utilGPU.h b/apps/showcases/PorousMediaGPU/utilGPU.h
index 1d1bbc6c7a6662f7aa978367f51ab3b8b3d38c7c..99bbc9fe5e3461e0bb8eee9c232a9262087aa8c0 100644
--- a/apps/showcases/PorousMediaGPU/utilGPU.h
+++ b/apps/showcases/PorousMediaGPU/utilGPU.h
@@ -46,16 +46,51 @@ void updateProbe(gpu::GPUField<real_t>* density, gpu::GPUField<real_t>* velocity
                  const Vector3<uint32_t>& block, const Vector3<uint32_t>& grid);
 
 
-
 class BulkVelocityCalculator {
 public:
-    BulkVelocityCalculator(const std::shared_ptr<StructuredBlockForest> & blocks, const IDs& ids, const Setup& setup, const uint_t forceUpdateFrequency) 
-    : blocks_(blocks), setup_(setup), ids_(ids), forceUpdateFrequency_(forceUpdateFrequency), executionCounter_{0}
-    {
+    BulkVelocityCalculator(const std::shared_ptr<StructuredBlockForest> & blocks, const IDs& ids, const Setup& setup, const Config::BlockHandle& forceParameters) 
+    : blocks_(blocks), setup_(setup), ids_(ids) {
 #if __CUDA_ARCH__ < 600
-    WALBERLA_LOG_WARNING_ON_ROOT("Warning atomic add in double precision not supported. Falling back to software implementation")
+        WALBERLA_LOG_WARNING_ON_ROOT("Warning atomic add in double precision not supported. Falling back to software implementation")
 #endif
 
+        bool snapshotExists = false;
+        const std::string snapshotFilename = setup_.snapshotBaseFolder + "/" + setup_.fileStem + "DrivingForceSnapshot.file";
+        filesystem::path snapshotFilePath( fileName_.c_str() );
+        if( filesystem::exists( snapshotFilePath ) )
+            snapshotExists = true;
+
+
+        if(snapshotExists && setup_.loadSnapshot){
+            WALBERLA_LOG_INFO_ON_ROOT("Intialise PID Controller from Snapshot")
+            forceUpdateFrequency_ = forceParameters.getParameter< uint_t >("forceUpdateFrequency", 0);
+            adaptDrivingForce_ = forceParameters.getParameter< bool >("adaptDrivingForce", false);
+            readStateFromFile();
+        }
+        else{
+            forceUpdateFrequency_ = forceParameters.getParameter< uint_t >("forceUpdateFrequency", 0);
+            adaptDrivingForce_ = forceParameters.getParameter< bool >("adaptDrivingForce", false);
+            bulkVelocity_ = setup_.latticeVelocity;
+            drivingForce_ = setup_.drivingForce;
+
+            proportionalGain_ = forceParameters.getParameter< real_t >("proportionalGain");
+            derivateGain_     = forceParameters.getParameter< real_t >("derivativeGain");
+            integralGain_     = forceParameters.getParameter< real_t >("integralGain");
+            maxRamp_          = forceParameters.getParameter< real_t >("maxRamp");
+
+            minActuatingVariable_ = drivingForce_ - real_c(1000) * drivingForce_;
+            maxActuatingVariable_ = drivingForce_ + real_c(1000) * drivingForce_;
+
+            std::fill(errorHistory_, errorHistory_ + sizeof(errorHistory_) / sizeof(real_t), real_c(0.0));
+
+            if (integralGain_ > real_c(0.0))
+                errorIntegral_ = drivingForce_ / integralGain_;
+            else
+                errorIntegral_ = real_c(0.0);
+
+            executionCounter_ = uint_c(0);
+        }
+
         volume_ = 0.0;
         uint_t nBlocks = 0;
         for( auto it = blocks_->begin(); it != blocks_->end(); ++it ){
@@ -84,25 +119,70 @@ public:
         WALBERLA_GPU_CHECK(gpuMalloc((void **)&resultGPU_, sizeof(double) * resultCPU_.size() ));
         WALBERLA_GPU_CHECK(gpuMemset(resultGPU_, 0.0, sizeof(double) * resultCPU_.size()));
 
-        const auto outputFolder = setup_.outputFolder;
-        const auto fileStem = setup_.fileStem;
-        fileName_ = outputFolder + "/" + fileStem + "DrivingForce.csv"; 
+        fileName_ = setup_.outputFolder + "/" + setup_.fileStem + "DrivingForce.csv"; 
         filesystem::path drivingForceOuputFile( fileName_.c_str() );
 
         WALBERLA_ROOT_SECTION()
         {
-            if( filesystem::exists( drivingForceOuputFile ) )
-                std::remove( drivingForceOuputFile.string().c_str() );
-        
-            std::ofstream outfile( drivingForceOuputFile.c_str() );
-            outfile << "SEP=," << "\n";
+            if(!setup_.loadSnapshot){
+                if( filesystem::exists( drivingForceOuputFile ) )
+                    std::remove( drivingForceOuputFile.string().c_str() );
+            }
 
-            outfile << "timestep," << "force," << "U_b" << "\n";
-            outfile.close();
+            if(!filesystem::exists( drivingForceOuputFile ) ){
+                std::ofstream outfile( drivingForceOuputFile.c_str() );
+                outfile << "SEP=," << "\n";
+
+                outfile << "timestep," << "time," << "force," << "U_b" << "\n";
+                outfile.close();
+            }
         }
+    }
 
+    void writeStateToFile() const{
+        const std::string filename = setup_.snapshotBaseFolder + "/" + setup_.fileStem + "DrivingForceSnapshot.file";
+        std::ofstream file;
+        file.open(filename);
+        file << std::setprecision(16);
+        file << bulkVelocity_ << std::endl;
+        file << drivingForce_ << std::endl;
+        file << proportionalGain_ << std::endl;
+        file << derivateGain_ << std::endl;
+        file << integralGain_ << std::endl;
+        file << maxRamp_ << std::endl;
+        file << minActuatingVariable_ << std::endl;
+        file << maxActuatingVariable_ << std::endl;
+        file << errorHistory_[0] << std::endl;
+        file << errorHistory_[1] << std::endl;
+        file << errorHistory_[2] << std::endl;
+        file << errorIntegral_ << std::endl;
+        file << executionCounter_ << std::endl;
+        file.close();
     }
 
+    void readStateFromFile(){
+        const std::string filename = setup_.snapshotBaseFolder + "/" + setup_.fileStem + "DrivingForceSnapshot.file";
+        std::ifstream file;
+        file.open(filename);
+        file >> bulkVelocity_;
+        file >> drivingForce_;
+        file >> proportionalGain_;
+        file >> derivateGain_;
+        file >> integralGain_;
+        file >> maxRamp_;
+        file >> minActuatingVariable_;
+        file >> maxActuatingVariable_;
+        file >> errorHistory_[0];
+        file >> errorHistory_[1];
+        file >> errorHistory_[2];
+        file >> errorIntegral_;
+        file >> executionCounter_;
+        file.close();
+    }
+
+
+    uint_t forceUpdateFrequency() const {return forceUpdateFrequency_;}
+    bool adaptDrivingForce() const {return adaptDrivingForce_;}
     double bulkVelocity() const { return bulkVelocity_; }
     void setBulkVelocity(const double bulkVelocity) { bulkVelocity_ = bulkVelocity; }
     void calculateBulkVelocity();
@@ -110,19 +190,33 @@ public:
     real_t getDrivingForce() {
         calculateBulkVelocity();
 
-        const real_t latticeReferenceLength = setup_.referenceLength / setup_.dxC;
-        const real_t drivingForce = calculateDrivingForce(setup_.frictionVelocity, latticeReferenceLength, setup_.latticeVelocity, bulkVelocity_);
-        const real_t timestep = real_c(executionCounter_ * forceUpdateFrequency_) * setup_.dt;
+        const real_t error = setup_.latticeVelocity - bulkVelocity_;
+        const real_t d = (error + real_c(3.0) * errorHistory_[0] - real_c(3.0) * errorHistory_[1] - errorHistory_[2]) * (real_c(1.0) / real_c(6.0));
 
-        WALBERLA_ROOT_SECTION()
-        {
+        std::rotate(errorHistory_, errorHistory_ + 1, errorHistory_ + sizeof(errorHistory_) / sizeof(real_t));
+        errorHistory_[sizeof(errorHistory_) / sizeof(real_t) - size_t(1)] = error;
+
+        real_t newDrivingForce = proportionalGain_ * error + derivateGain_ * d + integralGain_ * errorIntegral_;
+
+        if (std::fabs(drivingForce_ - newDrivingForce) < maxRamp_){
+            errorIntegral_ += error;
+            newDrivingForce = proportionalGain_ * error + derivateGain_ * d + integralGain_ * errorIntegral_;
+        }
+
+        const real_t maxValue = std::min(drivingForce_ + maxRamp_, maxActuatingVariable_);
+        const real_t minValue = std::max(drivingForce_ - maxRamp_, minActuatingVariable_);
+
+        drivingForce_ = std::min(std::max(minValue, newDrivingForce), maxValue);
+
+        const real_t timestep = real_c(executionCounter_ * forceUpdateFrequency_) * setup_.dt;
+        WALBERLA_ROOT_SECTION(){
             std::ofstream outfile( fileName_.c_str(), std::ios_base::app );
-            outfile << timestep << "," << drivingForce << "," << bulkVelocity_ << "\n";
+            outfile << executionCounter_ * forceUpdateFrequency_ << "," << timestep << "," << drivingForce_ << "," << bulkVelocity_ << "\n";
             outfile.close();
         }
 
         executionCounter_++;
-        return drivingForce;
+        return drivingForce_;
     }
 
 private:
@@ -130,12 +224,27 @@ private:
     const Setup setup_;
     const IDs ids_;
 
-    const uint_t forceUpdateFrequency_;
+    uint_t forceUpdateFrequency_;
+    bool adaptDrivingForce_;
+
+    real_t bulkVelocity_;
+    real_t drivingForce_;
+
+    real_t proportionalGain_;
+    real_t derivateGain_;
+    real_t integralGain_;
+
+    real_t maxRamp_;
+    real_t minActuatingVariable_;
+    real_t maxActuatingVariable_;
+
+    real_t errorHistory_[3];
+    real_t errorIntegral_;
+
     uint_t executionCounter_;
     std::string fileName_;
 
     double volume_;
-    double bulkVelocity_{0.0};
 
     std::vector<double> resultCPU_;
     double* resultGPU_;
diff --git a/src/field/vtk/VTKWriter.h b/src/field/vtk/VTKWriter.h
index 436c4aab145e8f96c8cd2b14019f0ee121f884d3..3580fe16d7b1189d7d33ade898609e6b4795fa6c 100644
--- a/src/field/vtk/VTKWriter.h
+++ b/src/field/vtk/VTKWriter.h
@@ -103,12 +103,15 @@ class VTKWriter : public vtk::BlockCellDataWriter< OutputType,
                                                    VectorTrait<typename Field_T::value_type>::F_SIZE * Field_T::F_SIZE >
 {
 public:
-   using OutputTrait = VectorTrait<typename Field_T::value_type>;
-
+   using field_type = typename Field_T::value_type;
+   using OutputTrait = VectorTrait<field_type>;
    using base_t = vtk::BlockCellDataWriter<OutputType, OutputTrait::F_SIZE * Field_T::F_SIZE>; 
 
-   VTKWriter( const ConstBlockDataID bdid, const std::string& id ) :
-      base_t( id ), bdid_( bdid ), field_( nullptr ) {}
+   VTKWriter( const ConstBlockDataID bdid, const std::string& id) :
+      base_t( id ), bdid_( bdid ), field_( nullptr ), conversionFactor_( field_type(1) ) {}
+
+   VTKWriter( const ConstBlockDataID bdid, const std::string& id, const field_type conversionFactor) :
+      base_t( id ), bdid_( bdid ), field_( nullptr ), conversionFactor_(conversionFactor) {}
 
 protected:
 
@@ -124,19 +127,20 @@ protected:
       if ( Field_T::F_SIZE == 1 )
       {
          // common case that can be handled faster
-         return numeric_cast<OutputType>( OutputTrait::get( field_->get(x,y,z,0), uint_c(f) )  );
+         return numeric_cast<OutputType>( OutputTrait::get( field_->get(x,y,z,0), uint_c(f) ) * conversionFactor_ );
       }
       else
       {
          const cell_idx_t fField = f / cell_idx_c( OutputTrait::F_SIZE );
          const cell_idx_t fType  = f % cell_idx_c( OutputTrait::F_SIZE );
 
-         return numeric_cast<OutputType>( OutputTrait::get( field_->get(x,y,z,fField), uint_c(fType) )  );
+         return numeric_cast<OutputType>( OutputTrait::get( field_->get(x,y,z,fField), uint_c(fType) ) * conversionFactor_ );
       }
    }
 
    const ConstBlockDataID bdid_;
    const Field_T * field_;
+   const field_type conversionFactor_;
 
 };
 
@@ -144,12 +148,13 @@ template< typename Field_T, typename OutputType = float >
 class VTKMagnitudeWriter : public vtk::BlockCellDataWriter< OutputType, 1 >
 {
 public:
-
    using base_t = vtk::BlockCellDataWriter<OutputType, 1>;
    using type_t = typename Field_T::value_type;
 
    VTKMagnitudeWriter( const ConstBlockDataID bdid, const std::string& id ) :
-      base_t( id ), bdid_( bdid ), field_( nullptr ) {}
+      base_t( id ), bdid_( bdid ), field_( nullptr ), conversionFactor_( type_t(1) ) {}
+   VTKMagnitudeWriter( const ConstBlockDataID bdid, const std::string& id, const type_t conversionFactor ) :
+      base_t( id ), bdid_( bdid ), field_( nullptr ), conversionFactor_( conversionFactor ) {}
 
 protected:
 
@@ -162,13 +167,13 @@ protected:
    {
       WALBERLA_ASSERT_NOT_NULLPTR( field_ );
       if ( Field_T::F_SIZE == 1 ){
-         return numeric_cast<OutputType>( field_->get(x,y,z,0) );
+         return numeric_cast<OutputType>( field_->get(x,y,z,0) * conversionFactor_ );
       }
       else if (Field_T::F_SIZE == 2){
-         return numeric_cast<OutputType>( std::sqrt( field_->get(x,y,z,0)*field_->get(x,y,z,0) + field_->get(x,y,z,1)*field_->get(x,y,z,1) ));
+         return numeric_cast<OutputType>( std::sqrt( field_->get(x,y,z,0)*field_->get(x,y,z,0) + field_->get(x,y,z,1)*field_->get(x,y,z,1) ) * conversionFactor_);
       }
       else if (Field_T::F_SIZE == 3){
-         return numeric_cast<OutputType>( std::sqrt( field_->get(x,y,z,0)*field_->get(x,y,z,0) + field_->get(x,y,z,1)*field_->get(x,y,z,1) + field_->get(x,y,z,2)*field_->get(x,y,z,2) ));
+         return numeric_cast<OutputType>( std::sqrt( field_->get(x,y,z,0)*field_->get(x,y,z,0) + field_->get(x,y,z,1)*field_->get(x,y,z,1) + field_->get(x,y,z,2)*field_->get(x,y,z,2) ) * conversionFactor_);
       }
       else{
          WALBERLA_ABORT("Only fields with 1, 2 or 3 entries per cell are supported");
@@ -177,6 +182,7 @@ protected:
 
    const ConstBlockDataID bdid_;
    const Field_T * field_;
+   const type_t conversionFactor_;
 
 }; // class VTKMagnitudeWriter
 
diff --git a/src/vtk/VTKOutput.h b/src/vtk/VTKOutput.h
index 74f12f05a58e7aeeb16c53b85bbe161181b2b8e9..a8d67ce3eea03c93adafa33900e1e455491b8554 100644
--- a/src/vtk/VTKOutput.h
+++ b/src/vtk/VTKOutput.h
@@ -155,6 +155,7 @@ public:
 
    // 'filteredCells' contains block local cell coordinates of all cells that are written to file (if the cell is no excluded!)
    inline void addCellInclusionFilter( CellFilter f );
+   inline void removeCellInclusionFilters(){cellInclusionFunctions_.clear();}
    // 'filteredCells' contains block local cell coordinates of all cells that must not be written to file
    inline void addCellExclusionFilter( CellFilter f );