diff --git a/CMakeLists.txt b/CMakeLists.txt
index 10eef1bc9ca0c769726449ff618b36f310b76426..7b21f626822914ca3412b781d24442070a11048a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -70,6 +70,7 @@ option ( WALBERLA_BUILD_WITH_MPI            "Build with MPI"
 option ( WALBERLA_BUILD_WITH_METIS          "Build with metis graph partitioner"             OFF )
 option ( WALBERLA_BUILD_WITH_PARMETIS       "Build with ParMetis graph partitioner"          OFF )
 option ( WALBERLA_BUILD_WITH_FFTW           "Build with FFTW Fourier Transform library"      OFF )
+option ( WALBERLA_BUILD_WITH_HDF5           "Build with HDF5 support for I/O"                OFF )
 
 option ( WALBERLA_BUILD_WITH_GPROF          "Enables gprof"                                      )
 option ( WALBERLA_BUILD_WITH_GCOV           "Enables gcov"                                       )
@@ -1137,6 +1138,27 @@ endif ( )
 
 ############################################################################################################################
 
+############################################################################################################################
+##
+## HDF5
+##
+#############################################################################################################################
+
+if (WALBERLA_BUILD_WITH_HDF5)
+   find_package( HDF5 COMPONENTS C CXX REQUIRED )
+   if (NOT HDF5_FOUND)
+      message("HDF5 not found. HDF5 support is not possible.")
+      set ( WALBERLA_BUILD_WITH_HDF5 FALSE )
+   endif()
+
+   if(HDF5_IS_PARALLEL AND NOT WALBERLA_BUILD_WITH_MPI)
+      message("HDF5 was found. However, HDF5 is build with MPI support and waLBerla was build without MPI support. Either choose a serial HDF5 version or build waLBerla with MPI support.")
+      set ( WALBERLA_BUILD_WITH_HDF5 FALSE )
+   endif()
+endif ()
+
+############################################################################################################################
+
 
 
 ############################################################################################################################
diff --git a/apps/showcases/CMakeLists.txt b/apps/showcases/CMakeLists.txt
index c877683244060ac562f7162c7679285f62395b3a..9426ce664d2cba8b9f2e64b17e5d6776bfaa671f 100644
--- a/apps/showcases/CMakeLists.txt
+++ b/apps/showcases/CMakeLists.txt
@@ -11,7 +11,10 @@ if ( WALBERLA_BUILD_WITH_CODEGEN)
 
    add_subdirectory( Antidunes )
    add_subdirectory( FlowAroundSphere )
-   add_subdirectory( PorousMediaGPU )
+
+   if( WALBERLA_BUILD_WITH_HDF5)
+      add_subdirectory( PorousMediaGPU )
+   endif ()
 
    if (WALBERLA_BUILD_WITH_PYTHON)
       add_subdirectory( PhaseFieldAllenCahn )
diff --git a/apps/showcases/PorousMediaGPU/CMakeLists.txt b/apps/showcases/PorousMediaGPU/CMakeLists.txt
index c0622b0c32e0e062267ebe977c9e2aec6aecf2bd..9c59796b9b73f04c95ae4c6cdef70be476cb320c 100644
--- a/apps/showcases/PorousMediaGPU/CMakeLists.txt
+++ b/apps/showcases/PorousMediaGPU/CMakeLists.txt
@@ -19,10 +19,13 @@ waLBerla_generate_target_from_python(NAME PorousMediaGPUCodeGen
 if (WALBERLA_BUILD_WITH_CUDA OR WALBERLA_BUILD_WITH_HIP)
     waLBerla_add_executable ( NAME PorousMediaGPU
             FILES PorousMediaGPU.cpp GridGeneration.h Setup.h Types.h InitializerFunctions.cpp TPMS.cpp Inflow.cpp Probe.cpp util.cpp utilGPU.cu
-            DEPENDS blockforest boundary core field gpu lbm_generated postprocessing stencil timeloop vtk PorousMediaGPUCodeGen )
+            DEPENDS blockforest boundary core field gpu io lbm_generated postprocessing stencil timeloop vtk PorousMediaGPUCodeGen )
 else()
     waLBerla_add_executable ( NAME PorousMediaGPU
             FILES PorousMediaGPU.cpp GridGeneration.h Setup.h Types.h InitializerFunctions.cpp TPMS.cpp Inflow.cpp Probe.cpp util.cpp
-            DEPENDS blockforest boundary core field lbm_generated postprocessing stencil timeloop vtk PorousMediaGPUCodeGen )
+            DEPENDS blockforest boundary core field io lbm_generated postprocessing stencil timeloop vtk PorousMediaGPUCodeGen )
+
+# target_include_directories(PorousMediaGPU PUBLIC ${HDF5_CXX_INCLUDE_DIRS})
+# target_link_libraries(PorousMediaGPU PUBLIC ${HDF5_LIBRARIES} )
 
 endif(WALBERLA_BUILD_WITH_CUDA OR WALBERLA_BUILD_WITH_HIP)
diff --git a/apps/showcases/PorousMediaGPU/Inflow.cpp b/apps/showcases/PorousMediaGPU/Inflow.cpp
index 03ee9423c5d7d6fa80d95e18b06950c7ea9c8d43..7341db6add36c06e2f4dfa2feb07c2f19624a07b 100644
--- a/apps/showcases/PorousMediaGPU/Inflow.cpp
+++ b/apps/showcases/PorousMediaGPU/Inflow.cpp
@@ -76,7 +76,7 @@ void Inflow::updateGPU(){
 
         auto velocityGPU = block.template getData< GPUField_T >(ids_.velocityFieldInflowSliceGPU);
         updateVelocityPlane(velocityGPU, kappaGPU_, utGPU_, globalMinCorner_, blockMinCorner, uc_, 
-                            dx_, t, n_, latticeVelocity_, block_, grid_);
+                            dx_, t, n_, latticeVelocity_, nGhostLayers_, block_, grid_);
 
     }
     ++executionCounter_;
@@ -126,7 +126,7 @@ void Inflow::init(){
 
     globalMinCorner_ = blocks_->getDomain().min();
 
-    const Vector3<uint_t> s(n_, setup_.cellsPerBlock[1], setup_.cellsPerBlock[2]);
+    const Vector3<uint_t> s(n_, setup_.cellsPerBlock[1] + 2 * nGhostLayers_, setup_.cellsPerBlock[2] + 2 * nGhostLayers_);
     getBlockAndGridSize(s, block_, grid_);
 
     WALBERLA_GPU_CHECK(gpuMalloc((void **)&kappaGPU_, sizeof(real_t) * kappa_.size() ));
diff --git a/apps/showcases/PorousMediaGPU/Inflow.h b/apps/showcases/PorousMediaGPU/Inflow.h
index 7b5aa1efd1f24726fa70eacf9abd3e22877a5a38..d089bf3195207a1531e754ed47ff25077ef9e03e 100644
--- a/apps/showcases/PorousMediaGPU/Inflow.h
+++ b/apps/showcases/PorousMediaGPU/Inflow.h
@@ -77,6 +77,7 @@ class Inflow
    real_t dx_;
    Vector3<real_t> uc_;
    real_t latticeVelocity_;
+   const int64_t nGhostLayers_{1};
 
    real_t firstMode_;
    real_t lastMode_;
diff --git a/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp b/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp
index 3b91770a5db4eb61b6969eff0d885c49156f579d..b26fface76636d1ae507268f8853e470a522eda6 100644
--- a/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp
+++ b/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp
@@ -87,6 +87,8 @@
 #include "TPMS.h"
 #include "Types.h"
 
+
+
 #include "PorousMediaGPUStaticDefines.h"
 
 using namespace walberla;
@@ -106,8 +108,7 @@ using blockforest::communication::UniformBufferedScheme;
 using lbm_generated::UniformGeneratedPdfPackInfo;
 #endif
 
-inline Vector3< uint_t > inflowSliceSize( const shared_ptr< StructuredBlockStorage > & blocks, IBlock * const block )
-{
+inline Vector3< uint_t > inflowSliceSize( const shared_ptr< StructuredBlockStorage > & blocks, IBlock * const block ){
    return Vector3<uint_t>( uint_c(1), blocks->getNumberOfYCells( *block ), blocks->getNumberOfZCells( *block ) );
 }
 
@@ -144,6 +145,7 @@ int main(int argc, char** argv){
                               "\n   + lattice velocity:    " << setup.latticeVelocity << " [dx/dt]" <<
                               "\n   + Reynolds number:     " << setup.reynoldsNumber <<
                               "\n   + Mach number:         " << setup.machNumber <<
+                              "\n   + driving force:       " << setup.drivingForce <<
                               "\n   + dx (coarsest grid):  " << setup.dxC << " [m]" <<
                               "\n   + dt (coarsest grid):  " << setup.dt << " [s]" <<
                               "\n   + conversion Faktor:   " << setup.conversionFactor <<
@@ -181,11 +183,11 @@ int main(int argc, char** argv){
    ids.densityFieldGPU  = gpu::addGPUFieldToStorage< ScalarField_T >(blocks, ids.densityField, "velocity on GPU", true);
 
    std::function< Vector3< uint_t > (const shared_ptr< StructuredBlockStorage >&, IBlock * const) > getInflowSliceSize = inflowSliceSize;
-   ids.velocityFieldInflowSlice    = field::addToStorage< VelocityField_T >(blocks, "velocity Inflow", getInflowSliceSize, macroFieldType(0.0), field::fzyx, uint_c(0));
+   ids.velocityFieldInflowSlice    = field::addToStorage< VelocityField_T >(blocks, "velocity Inflow", getInflowSliceSize, macroFieldType(0.0), field::fzyx, uint_c(1));
    ids.velocityFieldInflowSliceGPU = gpu::addGPUFieldToStorage< VelocityField_T >(blocks, ids.velocityFieldInflowSlice, "velocity Inflow on GPU", true);
 
    const Cell innerOuterSplit = Cell(parameters.getParameter< Vector3<cell_idx_t> >("innerOuterSplit", Vector3<cell_idx_t>(1, 1, 1)));
-   SweepCollection_T sweepCollection(blocks, ids.pdfFieldGPU, ids.densityFieldGPU, ids.velocityFieldGPU, setup.omega, innerOuterSplit);
+   SweepCollection_T sweepCollection(blocks, ids.pdfFieldGPU, ids.densityFieldGPU, ids.velocityFieldGPU, setup.drivingForce, setup.omega, innerOuterSplit);
 
    WALBERLA_LOG_INFO_ON_ROOT(++step << ". Initialising data")
    for (auto& block : *blocks){
@@ -194,13 +196,17 @@ int main(int argc, char** argv){
    sweepCollection.initialiseBlockPointer();
 
    auto inflow = std::make_shared<Inflow>(blocks, setup, ids);
-
    auto probe = std::make_shared<Probe>(blocks, setup, ids, config->getOneBlock("Probes"));
 
-   // probe->addPoint(Vector3<real_t>(1.5, 0.5, 0.5 ));
-   // probe->addPoint(Vector3<real_t>(2.5, 0.5, 0.5 ));
-   // probe->addPoint(Vector3<real_t>(3.5, 0.5, 0.5));
+   const real_t lenght = 8.0;
+   const real_t res = 3.0;
+   const real_t start = 0.0;
 
+   const uint_t nx = uint_c(std::floor(lenght / (setup.dxC * res)));
+   for(uint_t i = 0; i < nx; ++i){
+      probe->addPoint(Vector3<real_t>(start + real_c(i) * (setup.dxC * res), 0.5, 0.5 ));
+   }
+   
    probe->initialise();
 
    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -224,10 +230,12 @@ int main(int argc, char** argv){
    }
 
    if(config->getNumBlocks("RoughWall") > 0){
+      WALBERLA_LOG_INFO_ON_ROOT(++step << ". Using RoughWall setup")
       initHemisphers(blocks, ids.flagField, setup, config->getOneBlock("RoughWall"));
    }
 
    if(config->getNumBlocks("TPMS") > 0){
+      WALBERLA_LOG_INFO_ON_ROOT(++step << ". Placing TPMS material")
       auto tpms = std::make_shared<TPMS>(config->getOneBlock("TPMS"), setup);
       tpms->setupBoundary(blocks, ids.flagField);
    }
diff --git a/apps/showcases/PorousMediaGPU/PorousMediaGPU.py b/apps/showcases/PorousMediaGPU/PorousMediaGPU.py
index 17298b5aac7b20e3b719a1b3ab318c6cfab87b58..b617aabc3e060483721d720926c503014e2080ae 100644
--- a/apps/showcases/PorousMediaGPU/PorousMediaGPU.py
+++ b/apps/showcases/PorousMediaGPU/PorousMediaGPU.py
@@ -77,7 +77,7 @@ with CodeGeneration() as ctx:
                                  boundary_object=UBB(velocity=velocity_field.center_vector, density=1.0, data_type=dtype, dim=dim),
                                  field_data_type=pdf_dtype)
 
-    outflow_boundary = ExtrapolationOutflow(stencil[4], lb_method, data_type=pdf_dtype)
+    outflow_boundary = ExtrapolationOutflow(stencil[4], lb_method, data_type=pdf_dtype, density=1.0)
     outflow = lbm_boundary_generator(class_name='Outflow', flag_uid='Outflow',
                                      boundary_object=outflow_boundary,
                                      field_data_type=pdf_dtype)
diff --git a/apps/showcases/PorousMediaGPU/Probe.cpp b/apps/showcases/PorousMediaGPU/Probe.cpp
index f26b120a34e61543dbffd1f61b22d0512ff1a08b..5e85bb4dea31c2b90544e77b604a193c3346c485 100644
--- a/apps/showcases/PorousMediaGPU/Probe.cpp
+++ b/apps/showcases/PorousMediaGPU/Probe.cpp
@@ -19,13 +19,13 @@
 //======================================================================================================================
 #include "Probe.h"
 
+
 namespace walberla{
 
 void Probe::initialise(){
 
     if(probePoints_.empty()){return;}
 
-    std::map<IBlock*, std::vector<Vector3<real_t>> > pointsPerBlock;
     const AABB domain = blocks_->getDomain();
  
     for(auto p: probePoints_){
@@ -36,7 +36,7 @@ void Probe::initialise(){
             Vector3<real_t> localCellCenter;
             blocks_->getCellCenter(localCellCenter, localCell);
 
-            pointsPerBlock[block].emplace_back(localCellCenter);
+            pointsPerBlock_[block].emplace_back(localCellCenter);
         }
     }
 
@@ -45,11 +45,11 @@ void Probe::initialise(){
         auto & cpuCellVector = dataVector->cpuCellVector();
         auto & cpuDataVector = dataVector->cpuDataVector();
 
-        const uint_t n = uint_c(pointsPerBlock[&block].size());
+        const uint_t n = uint_c(pointsPerBlock_[&block].size());
         cpuCellVector.resize(3 * n);
 
         uint_t i = 0;
-        for(auto p: pointsPerBlock[&block]){
+        for(auto p: pointsPerBlock_[&block]){
             const Cell localCell = blocks_->getBlockLocalCell(block, p);
 
             cpuCellVector[i] = localCell[0];
@@ -58,7 +58,7 @@ void Probe::initialise(){
             ++i;
         }
 
-        cpuDataVector.resize(4 * n * writeCounter_);
+        cpuDataVector.resize(dataSetNames.size() * n * writeCounter_);
         dataVector->syncGPU();
 
         const Vector3<uint_t> s(n, uint_c(1), uint_c(1));
@@ -71,21 +71,31 @@ void Probe::initialise(){
         gpuGrid_[&block] = numberBlocks;
     }
 
+    extendSize[0] = 0;
+    selectStart[0] = 0;
+
     writeHeaderFiles();
 }
 
 void Probe::writeHeaderFiles(){
 
-    filename_ += std::to_string(mpi::MPIManager::instance()->rank()) + ".txt";
+    const hsize_t dsetSize[1]{ 1 };
+    const hsize_t maxSize[1]{ H5S_UNLIMITED };
+    const hsize_t chunkSize[1]{writeCounter_};
 
-    filesystem::path dataFile( filename_.c_str() );
-    if( filesystem::exists( dataFile ) )
-        std::remove( dataFile.string().c_str() );
+    H5::DataSpace arrayDataSpace(1, dsetSize, maxSize);
+    H5::DSetCreatPropList props;
+    props.setChunk(1, chunkSize);
 
-    std::ofstream outfile( filename_.c_str() );
-    outfile << std::setprecision(2);
-    uint_t counter = 0;
+    filesystem::path hdfFilePath( filename_.c_str() );
+    if( filesystem::exists( hdfFilePath ) )
+        std::remove( hdfFilePath.string().c_str() );
 
+    h5file_ = io::createHdf5file(hdfFilePath);
+    H5::Group timeGroup = h5file_.createGroup("time");
+    timeGroup.createDataSet("t", io::h5Float(), arrayDataSpace, props);
+
+    uint_t counter = 0;
     for (auto blockIterator = blocks_->begin(); blockIterator != blocks_->end(); ++blockIterator){
         auto block = blockIterator.get();
         auto* dataVector = block->getData< DataVector > ( probeDataID );
@@ -95,61 +105,83 @@ void Probe::writeHeaderFiles(){
             const Cell localCell(cell_idx_c(cpuCellVector[i]), cell_idx_c(cpuCellVector[i + n]), cell_idx_c(cpuCellVector[i + 2*n]));
             Vector3< real_t > p;
             blocks_->getBlockLocalCellCenter(*block, localCell, p);
-            outfile << "p" << counter++ << "(" << p[0] << ", " << p[1] << ", " << p[2] << ")";
-            if( !( std::next(blockIterator) == blocks_->end() &&  i == n - 1) ){outfile << ", ";}
-        }
-    }
 
-    outfile << "\n";
-    outfile << "t,";
+            std::string probeName = "Probe" + std::to_string(counter);
+            if (h5file_.exists(probeName)) { continue; }
+            H5::Group probeGroup = h5file_.createGroup(probeName);
 
-    counter = 0;
-    for (auto blockIterator = blocks_->begin(); blockIterator != blocks_->end(); ++blockIterator){
-        auto block = blockIterator.get();
-        auto* dataVector = block->getData< DataVector > ( probeDataID );
-        auto & cpuCellVector = dataVector->cpuCellVector();
-        const uint_t n = uint_c(dataVector->size());
-        for(uint_t i = 0; i < n; ++i){
-            outfile << "rho" << counter << "," << "ux" << counter << "," << "uy" << counter << "," << "uz" << counter;
+            hsize_t locationShape[1]{ 3 };
+            H5::DataSpace spacingAttrDspace(1, locationShape, locationShape);
+            H5::Attribute locationAttr = probeGroup.createAttribute("Location", io::h5FileFloat(), spacingAttrDspace);
+
+            float location[3]{float(p[0]), float(p[1]), float(p[2])};
+            locationAttr.write(io::h5Float(), location);
+
+            for(auto name: dataSetNames){
+                probeGroup.createDataSet(name, io::h5Float(), arrayDataSpace, props);
+            }
             counter++;
-            if( !( std::next(blockIterator) == blocks_->end() &&  i == n - 1) ){outfile << ",";}
         }
     }
-    outfile << "\n";
-    outfile.close();
-
 }
 
 void Probe::writeProbes(){
-    std::ofstream outfile( filename_.c_str(), std::ios_base::app );
-    outfile << std::setprecision(8);
 
+    extendSize[0] += executionCounter_;
+    
+    std::vector<float> tmp(executionCounter_);
     const real_t t = (real_c(totalExecutionCounter_ + frequency_ - uint_c(1)) * setup_.dt) - ( real_c(executionCounter_ * frequency_) * setup_.dt);
     for(uint_t i = 0; i < executionCounter_; ++i){
-        outfile << t + real_c(i)*setup_.dt << ",";
-        for (auto blockIterator = blocks_->begin(); blockIterator != blocks_->end(); ++blockIterator){
-            auto block = blockIterator.get();
-            auto* dataVector = block->getData< DataVector > ( probeDataID );
-            if(i == 0){
-                dataVector->syncCPU();
-                dataVector->clear();
-            }
-            auto & cpuDataVector = dataVector->cpuDataVector();
-            const uint_t n = uint_c(dataVector->size());
+        tmp[i] = float_c(t) + float_c(i)*float_c(setup_.dt);
+    }
 
-            for(uint_t j = 0; j < n; ++j){
-                for (uint_t k = 0; k < 4; ++k){
-                    outfile << cpuDataVector[i*4*n + n*k + j];
-                    if( !(j == n - 1 && k == 3) ){outfile << ",";}
+    writeData(tmp, "time", "t");
+
+    for (auto blockIterator = blocks_->begin(); blockIterator != blocks_->end(); ++blockIterator){
+        auto block = blockIterator.get();
+        auto* dataVector = block->getData< DataVector > ( probeDataID );
+        dataVector->syncCPU();
+        dataVector->clear();
+    }
+
+    const uint_t nDatasets = dataSetNames.size();
+
+    for (auto blockIterator = blocks_->begin(); blockIterator != blocks_->end(); ++blockIterator){
+        auto block = blockIterator.get();
+        auto* dataVector = block->getData< DataVector > ( probeDataID );
+        auto & cpuDataVector = dataVector->cpuDataVector();
+        const uint_t n = uint_c(dataVector->size());
+
+        for(uint_t i = 0; i < n; ++i){
+            const std::string probeName = "Probe" + std::to_string(i);
+            for(uint_t j = 0; j < nDatasets; ++j){
+                auto dataSetName = dataSetNames[j];
+                for(uint_t k = 0; k < executionCounter_; ++k){
+                    tmp[k] = float_c(cpuDataVector[k*nDatasets*n + j*n + i]);
                 }
+                writeData(tmp, probeName, dataSetName);
             }
-            outfile << "\n";
         }
     }
-    outfile.close();
+
+    selectStart[0] += executionCounter_;
     executionCounter_ = 0;
 }
 
+void Probe::writeData(const std::vector<float>& data, const std::string& groupName, const std::string& dataSetName){
+    const hsize_t selectCount[1] { executionCounter_ };
+
+    auto group = h5file_.openGroup(groupName);
+    H5::DataSet dataset = group.openDataSet(dataSetName);
+
+    dataset.extend(extendSize);
+    H5::DataSpace dataspace = dataset.getSpace();
+    dataspace.selectHyperslab(H5S_SELECT_SET, selectCount, selectStart);
+        
+    H5::DataSpace memSpace(1, selectCount);
+    dataset.write(data.data(), io::h5Float(), memSpace, dataspace);
+}
+
 void Probe::update(){
     ++totalExecutionCounter_;
     if (probePoints_.empty() || (totalExecutionCounter_ - uint_c(1)) % frequency_ != 0) return;
diff --git a/apps/showcases/PorousMediaGPU/Probe.h b/apps/showcases/PorousMediaGPU/Probe.h
index dabb2798c3846609fbcc85126b1cd234dc99c0fb..25a7dba9c1e34e50c67e84b3edbc91a1cc1e6ec7 100644
--- a/apps/showcases/PorousMediaGPU/Probe.h
+++ b/apps/showcases/PorousMediaGPU/Probe.h
@@ -29,9 +29,13 @@
 #include "gpu/ErrorChecking.h"
 #include "gpu/FieldCopy.h"
 
+#include "io/hdfWrapper.h"
+
 #include "Setup.h"
 #include "utilGPU.h"
 
+// #include <H5Cpp.h>
+
 namespace walberla{
 
 class Probe
@@ -131,13 +135,20 @@ class Probe
   uint_t executionCounter_;
   uint_t totalExecutionCounter_;
 
+  H5::H5File h5file_;
+  hsize_t extendSize[1];
+  hsize_t selectStart[1];
+  const std::vector<std::string> dataSetNames{"rho", "ux", "uy", "uz"};
+
   std::vector<Vector3<real_t>> probePoints_;
+  std::map<IBlock*, std::vector<Vector3<real_t>> > pointsPerBlock_;
 
   std::map<IBlock*, Vector3<uint32_t>> gpuBlock_;
   std::map<IBlock*, Vector3<uint32_t>> gpuGrid_;
 
   void writeHeaderFiles();
   void writeProbes();
+  void writeData(const std::vector<float>& data, const std::string& groupName, const std::string& dataSetName);
 
 }; // class Probe
 } // namespace walberla
diff --git a/apps/showcases/PorousMediaGPU/Setup.h b/apps/showcases/PorousMediaGPU/Setup.h
index 10a1029415458161af06e885f5864858f93e204d..2a4ce7dd6de8a8124da70a10043ddf3f163dadec 100644
--- a/apps/showcases/PorousMediaGPU/Setup.h
+++ b/apps/showcases/PorousMediaGPU/Setup.h
@@ -29,6 +29,11 @@
 
 namespace walberla
 {
+
+inline real_t calculateDrivingForce(const real_t frictionVelocity, const real_t channelHeight){
+   return frictionVelocity * frictionVelocity / channelHeight;
+}
+
 struct Setup
 {
 
@@ -76,6 +81,11 @@ struct Setup
       dxF = real_c(coarseMeshSize) / real_c( 1 << refinementLevels );
       dt  = latticeVelocity / referenceVelocity * coarseMeshSize;
 
+      drivingForce = real_c(0.0);
+      if(config->getNumBlocks("RoughWall") > 0){
+         drivingForce = calculateDrivingForce(latticeVelocity, latticeReferenceLength);
+      }
+
       conversionFactor = dxC / dt;
 
       kinViscosity = viscosity * coarseMeshSize * coarseMeshSize / dt;
@@ -134,8 +144,8 @@ struct Setup
    real_t dxF;
    real_t dt;
 
+   real_t drivingForce;
    real_t conversionFactor;
-
    uint_t timesteps;
 
    uint_t valuesPerCell;
diff --git a/apps/showcases/PorousMediaGPU/TPMS.prm b/apps/showcases/PorousMediaGPU/TPMS.prm
index c3933e029060bb55056bb7d6a99b163c8c30c24d..98ff6f5b08e4ac98979e7ac346a177528cccfc3e 100644
--- a/apps/showcases/PorousMediaGPU/TPMS.prm
+++ b/apps/showcases/PorousMediaGPU/TPMS.prm
@@ -7,7 +7,7 @@ Parameters
     latticeVelocity 0.05;
     initialiseWithInletVelocity true;
 
-    timesteps 1;
+    timesteps 10000;
 
     processMemoryLimit 512; // MiB
     innerOuterSplit <1, 1, 1>;
@@ -24,7 +24,7 @@ Turbulence
     numberOfFourrierModes 128;
     U_c < 2.0, 0.0, 0.0 >;
 }
-
+/*
 TPMS
 {
     waveNumber 6.2831853072;
@@ -34,7 +34,7 @@ TPMS
     end 5.5;
     type gyroid;
 }
-
+*/
 //! [domainSetup]
 DomainSetup
 {
@@ -66,10 +66,10 @@ StabilityChecker
 
 VTKWriter
 {
-    vtkWriteFrequency 1;
+    vtkWriteFrequency 0;
     vtkGhostLayers 0;
     velocity true;
-    writeVelocityAsMagnitude true;
+    writeVelocityAsMagnitude false;
     density false;
     flag false;
     writeOnlySlice false;
@@ -93,6 +93,6 @@ Logging
 Probes
 {
     evaluationFrequency 1;
-    writeCounter 1000; // after how many evaluations the results are written to file
-    filename probes;
+    writeCounter 5000; // after how many evaluations the results are written to file
+    filename probeData.h5;
 }
\ No newline at end of file
diff --git a/apps/showcases/PorousMediaGPU/roughWall.prm b/apps/showcases/PorousMediaGPU/roughWall.prm
index d09a06a39e1f4c012827bb61cb64ff982e63f8d9..e85a7a2190f14fef7dcccd8ec36e1ecdcf45013e 100644
--- a/apps/showcases/PorousMediaGPU/roughWall.prm
+++ b/apps/showcases/PorousMediaGPU/roughWall.prm
@@ -1,13 +1,13 @@
 Parameters
 {
-    reynoldsNumber 2500;
+    reynoldsNumber 400;
 
     referenceVelocity 1.0;
     referenceLength 1.0;
     latticeVelocity 0.05;
     initialiseWithInletVelocity true;
 
-    timesteps 1;
+    timesteps 10001;
 
     processMemoryLimit 512; // MiB
     innerOuterSplit <1, 1, 1>;
@@ -34,7 +34,7 @@ RoughWall
 DomainSetup
 {
     domainSize    < 60, 40, 12 >;
-    cellsPerBlock < 400, 80, 80 >;
+    cellsPerBlock < 600, 400, 120 >;
     // cellsPerBlock < 768, 96, 96 >;
     // cellsPerBlock < 1536, 192, 192 >;
     blocks    < 1, 1, 1 >;
@@ -61,13 +61,13 @@ StabilityChecker
 
 VTKWriter
 {
-    vtkWriteFrequency 1;
+    vtkWriteFrequency 5000;
     vtkGhostLayers 0;
     velocity true;
     writeVelocityAsMagnitude true;
     density false;
     flag false;
-    writeOnlySlice false;
+    writeOnlySlice true;
     sliceThickness 1;
     writeXZSlice false;
     amrFileFormat false;
diff --git a/apps/showcases/PorousMediaGPU/utilGPU.cu b/apps/showcases/PorousMediaGPU/utilGPU.cu
index 8efd2fe45e686d91f18f0e676570e1f885887442..75abc7a4d014eb9b2aabf07b482c61dbe8073e1e 100644
--- a/apps/showcases/PorousMediaGPU/utilGPU.cu
+++ b/apps/showcases/PorousMediaGPU/utilGPU.cu
@@ -142,7 +142,7 @@ static __device__ void singleMode(double* rX, double* rY, double* rZ, const doub
 static __global__ void summOfFourrierModesKernel(double * velocity, double * const kappa, double * const ut,
                                                  const real_t yMin, const real_t zMin, const real_t minCornerY, const real_t minCornerZ,
                                                  const real_t dx, const real_t ucX, const real_t ucY, const real_t ucZ,
-                                                 const real_t t, const uint_t n, const real_t latticeVelocity,
+                                                 const real_t t, const uint_t n, const real_t latticeVelocity, const int64_t nGhostLayer,
                                                  const int64_t ySize, const int64_t zSize,
                                                  const int64_t yStride, const int64_t zStride, const int64_t fStride)
 {
@@ -162,8 +162,8 @@ static __global__ void summOfFourrierModesKernel(double * velocity, double * con
       const int64_t linear_index = x + (y * n) + (z * n * ySize);
      
       const real_t xCenter = real_t(0.0);
-      const real_t yCenter = (yMin + ( real_t( y ) + real_t(0.5) ) * dx) + minCornerY;
-      const real_t zCenter = (zMin + ( real_t( z ) + real_t(0.5) ) * dx) + minCornerZ;
+      const real_t yCenter = (yMin + ( real_t( y - nGhostLayer ) + real_t(0.5) ) * dx) + minCornerY;
+      const real_t zCenter = (zMin + ( real_t( z - nGhostLayer ) + real_t(0.5) ) * dx) + minCornerZ;
 
       const real_t pX = xCenter - (t * ucX);
       const real_t pY = yCenter - (t * ucY);
@@ -268,7 +268,7 @@ void getBlockAndGridSize(Vector3<uint_t> s, Vector3<uint32_t>& block, Vector3<ui
 }
 
 void updateVelocityPlane(gpu::GPUField<real_t>* velocityGPU, real_t* kappa, real_t* ut, const Vector3<real_t>& globalMinCorner, const Vector3<real_t>& blockMinCorner, const Vector3<real_t>& uc,
-                         const real_t dx, const real_t t, const uint_t n, const real_t latticeVelocity,
+                         const real_t dx, const real_t t, const uint_t n, const real_t latticeVelocity, const int64_t nGhostLayers,
                          const Vector3<uint32_t>& block, const Vector3<uint32_t>& grid){
 
    dim3 block_(block[0], block[1], block[2]);
@@ -277,15 +277,15 @@ void updateVelocityPlane(gpu::GPUField<real_t>* velocityGPU, real_t* kappa, real
    // In shared memory we store the result of the u_prime (x, y, z)
    const uint_t sharedMemorySize = 3 * n * sizeof(real_t);
  
-   double * data_velocity = velocityGPU->dataAt(0, 0, 0, 0);
-   const int64_t size_velocity_1 = int64_c(velocityGPU->ySize());
-   const int64_t size_velocity_2 = int64_c(velocityGPU->zSize());
+   double * data_velocity = velocityGPU->dataAt(-nGhostLayers, -nGhostLayers, -nGhostLayers, 0);
+   const int64_t size_velocity_1 = int64_c(velocityGPU->ySize()) + 2 * nGhostLayers;
+   const int64_t size_velocity_2 = int64_c(velocityGPU->zSize()) + 2 * nGhostLayers;
    const int64_t stride_velocity_1 = int64_c(velocityGPU->yStride());
    const int64_t stride_velocity_2 = int64_c(velocityGPU->zStride());
    const int64_t stride_velocity_3 = int64_c(velocityGPU->fStride());
    internal::summOfFourrierModesKernel<<<grid_, block_, sharedMemorySize, nullptr>>>(data_velocity, kappa, ut,
                                                                       globalMinCorner[1], globalMinCorner[2], blockMinCorner[1], blockMinCorner[2],
-                                                                      dx, uc[0], uc[1], uc[2], t, n, latticeVelocity,
+                                                                      dx, uc[0], uc[1], uc[2], t, n, latticeVelocity, nGhostLayers,
                                                                       size_velocity_1, size_velocity_2,
                                                                       stride_velocity_1, stride_velocity_2, stride_velocity_3);
 
diff --git a/apps/showcases/PorousMediaGPU/utilGPU.h b/apps/showcases/PorousMediaGPU/utilGPU.h
index d7c41492246b4b8bca4b27b22acc22583334a819..cdaf6ff40f4404472522bd9beb41d8eac8bb3c00 100644
--- a/apps/showcases/PorousMediaGPU/utilGPU.h
+++ b/apps/showcases/PorousMediaGPU/utilGPU.h
@@ -31,7 +31,7 @@ namespace walberla{
 void getBlockAndGridSize(Vector3<uint_t> s, Vector3<uint32_t>& block, Vector3<uint32_t>& grid);
 
 void updateVelocityPlane(gpu::GPUField<real_t>* velocityGPU, real_t* kappa, real_t* ut, const Vector3<real_t>& globalMinCorner, const Vector3<real_t>& blockMinCorner, const Vector3<real_t>& uc,
-                         const real_t dx, const real_t t, const uint_t n, const real_t latticeVelocity,
+                         const real_t dx, const real_t t, const uint_t n, const real_t latticeVelocity, const int64_t nGhostLayers,
                          const Vector3<uint32_t>& block, const Vector3<uint32_t>& grid);
 
 void updateProbe(gpu::GPUField<real_t>* pdfs, int64_t* cellData, real_t* probeData, const int64_t n, const real_t velocityScalingFactor,
diff --git a/python/lbmpy_walberla/additional_data_handler.py b/python/lbmpy_walberla/additional_data_handler.py
index daaab32fd40aff5f2150495bf4782ef61a282dae..b6d8a98f4457691a34f4f63b36045711adc6b606 100644
--- a/python/lbmpy_walberla/additional_data_handler.py
+++ b/python/lbmpy_walberla/additional_data_handler.py
@@ -258,6 +258,8 @@ class OutflowAdditionalDataHandler(AdditionalDataHandler):
             pdfs_data_type = BasicType(pdfs_data_type)
             self._pdfs_data_type = pdfs_data_type.c_name
 
+        self._density_correction = True if boundary_object.density is not None else False
+
         self._streaming_pattern = boundary_object.streaming_pattern
         if zeroth_timestep:
             self._zeroth_timestep = zeroth_timestep
@@ -296,6 +298,9 @@ class OutflowAdditionalDataHandler(AdditionalDataHandler):
         for key, value in self.get_init_dict(pdf_acc, direction_index).items():
             init_list.append(f"element.{key} = {self._dtype}( {self._field_name}->get({value}) );")
 
+        if self._density_correction:
+            init_list.append(f"element.rho = {self._dtype}( 1.0 );")
+
         return "\n".join(init_list)
 
     @property
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 13e7ac49ebbf92c6718f00e275af7077e7e17539..68dbcde98e7efb86a4dd38a7a444789fded650fb 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -34,6 +34,9 @@ add_subdirectory( field )
 add_subdirectory( gather )
 add_subdirectory( geometry )
 add_subdirectory( gui )
+if(WALBERLA_BUILD_WITH_HDF5)
+   add_subdirectory( io )
+endif()
 add_subdirectory( lbm )
 add_subdirectory( lbm_generated )
 add_subdirectory( lbm_mesapd_coupling )
diff --git a/src/io/CMakeLists.txt b/src/io/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d713ba37c16143341ec7cdc328a27d04d084cbb6
--- /dev/null
+++ b/src/io/CMakeLists.txt
@@ -0,0 +1,8 @@
+add_library( io )
+target_include_directories(io PUBLIC ${HDF5_CXX_INCLUDE_DIRS})
+target_link_libraries( io PUBLIC core ${HDF5_LIBRARIES} )
+target_sources( io
+      PRIVATE
+      hdfWrapper.h
+      hdfWrapper.cpp
+) 
\ No newline at end of file
diff --git a/src/io/hdfWrapper.cpp b/src/io/hdfWrapper.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..54617a665c9d588a609d77a3c3eee563ecfc23cf
--- /dev/null
+++ b/src/io/hdfWrapper.cpp
@@ -0,0 +1,44 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file hdfWrapper.h
+//! \ingroup postprocessing
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#include "hdfWrapper.h"
+
+namespace walberla::io{
+
+hdfFile createHdf5file(const filesystem::path& filepath){
+    
+  H5::FileAccPropList fileAccProps;
+
+#ifdef WALBERLA_BUILD_WITH_MPI
+    //  Enable MPI-IO
+    H5Pset_fapl_mpio(fileAccProps.getId(), mpi::MPIManager::instance()->comm(), MPI_INFO_NULL);
+#endif
+
+  //  All MPI processes collectively create and prepare the file
+  return H5::H5File(filepath.c_str(), H5F_ACC_CREAT | H5F_ACC_RDWR, H5::FileCreatPropList::DEFAULT, fileAccProps);
+
+}
+
+H5::FloatType h5Double(){return H5::FloatType(H5::PredType::NATIVE_DOUBLE);}
+H5::FloatType h5Float(){return H5::FloatType(H5::PredType::NATIVE_FLOAT);}
+H5::FloatType h5FileFloat(){return H5::FloatType(H5::PredType::IEEE_F32LE);}
+
+
+}
\ No newline at end of file
diff --git a/src/io/hdfWrapper.h b/src/io/hdfWrapper.h
new file mode 100644
index 0000000000000000000000000000000000000000..bd090771383ea156dbb2c8612bd7affd8b9d4175
--- /dev/null
+++ b/src/io/hdfWrapper.h
@@ -0,0 +1,41 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file hdfWrapper.h
+//! \ingroup postprocessing
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#pragma once
+
+#include "core/mpi/MPIManager.h"
+#include "core/mpi/MPIWrapper.h"
+#include "core/Filesystem.h"
+
+#include <H5Cpp.h>
+
+
+namespace walberla::io{
+
+using hdfFile = H5::H5File;
+hdfFile createHdf5file(const filesystem::path& filepath);
+
+H5::FloatType h5Double();
+H5::FloatType h5Float();
+H5::FloatType h5FileFloat();
+
+
+
+}
\ No newline at end of file