diff --git a/apps/showcases/PorousMediaGPU/CMakeLists.txt b/apps/showcases/PorousMediaGPU/CMakeLists.txt
index fdea6fdfa2482c26261aa781b1b0ac6bb79eb7b9..a5cf4003da1439199eb85abfa17b0521426c702c 100644
--- a/apps/showcases/PorousMediaGPU/CMakeLists.txt
+++ b/apps/showcases/PorousMediaGPU/CMakeLists.txt
@@ -18,11 +18,11 @@ waLBerla_generate_target_from_python(NAME PorousMediaGPUCodeGen
 
 if (WALBERLA_BUILD_WITH_CUDA OR WALBERLA_BUILD_WITH_HIP)
     waLBerla_add_executable ( NAME PorousMediaGPU
-            FILES PorousMediaGPU.cpp Types.h InitializerFunctions.cpp
-            DEPENDS blockforest boundary core field gpu lbm_generated stencil timeloop vtk PorousMediaGPUCodeGen )
+            FILES PorousMediaGPU.cpp GridGeneration.h Setup.h Types.h InitializerFunctions.cpp
+            DEPENDS blockforest boundary core field gpu lbm_generated postprocessing stencil timeloop vtk PorousMediaGPUCodeGen )
 else()
     waLBerla_add_executable ( NAME PorousMediaGPU
-            FILES PorousMediaGPU.cpp Types.h InitializerFunctions.cpp
-            DEPENDS blockforest boundary core field lbm_generated stencil timeloop vtk PorousMediaGPUCodeGen )
+            FILES PorousMediaGPU.cpp GridGeneration.h Setup.h Types.h InitializerFunctions.cpp
+            DEPENDS blockforest boundary core field lbm_generated postprocessing stencil timeloop vtk PorousMediaGPUCodeGen )
 
 endif(WALBERLA_BUILD_WITH_CUDA OR WALBERLA_BUILD_WITH_HIP)
diff --git a/apps/showcases/PorousMediaGPU/GridGeneration.h b/apps/showcases/PorousMediaGPU/GridGeneration.h
new file mode 100644
index 0000000000000000000000000000000000000000..01ebc88cf07f0a7e287bf8f22c6680ba3ac639a5
--- /dev/null
+++ b/apps/showcases/PorousMediaGPU/GridGeneration.h
@@ -0,0 +1,144 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file GridGeneration.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#pragma once
+
+#include "blockforest/Initialization.h"
+#include "blockforest/SetupBlock.h"
+#include "blockforest/SetupBlockForest.h"
+#include "blockforest/loadbalancing/StaticCurve.h"
+
+#include "core/Environment.h"
+#include "core/logging/Initialization.h"
+#include "core/timing/RemainingTimeLogger.h"
+#include "core/timing/TimingPool.h"
+
+#include <string>
+#include <fstream>
+
+#include "Types.h"
+#include "Setup.h"
+
+using namespace walberla;
+
+uint_t blockCells(const Setup& setup, const bool withGhostLayers){
+   uint_t cells;
+   if(!withGhostLayers){
+      cells = setup.cellsPerBlock[0] * setup.cellsPerBlock[1] * setup.cellsPerBlock[2];
+   }
+   else{
+      cells = (setup.cellsPerBlock[0] + 2 * setup.numGhostLayers) *
+              (setup.cellsPerBlock[1] + 2 * setup.numGhostLayers) *
+              (setup.cellsPerBlock[2] + 2 * setup.numGhostLayers);
+   }
+   return cells;
+}
+
+
+void createSetupBlockForest(SetupBlockForest& setupBfs, const Setup& setup, const bool useMPIManager=false)
+{
+   WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...")
+
+   uint_t numProcesses = setup.numProcesses;
+   const std::string blockForestFilestem = setup.blockForestFilestem;
+
+   if(useMPIManager) {numProcesses = uint_c(mpi::MPIManager::instance()->numProcesses());}
+
+   // Sphere Sphere( setup );
+   // SphereRefinementSelection SphereRefinementSelection( Sphere, setup.refinementLevels );
+   // SphereBlockExclusion SphereBlockExclusion( Sphere );
+
+   // setupBfs.addRefinementSelectionFunction(std::function<void(SetupBlockForest &)>(SphereRefinementSelection));
+   // setupBfs.addBlockExclusionFunction(SphereBlockExclusion);
+
+   const AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), setup.domainSize[0], setup.domainSize[1], setup.domainSize[2]);
+   setupBfs.addWorkloadMemorySUIDAssignmentFunction(blockforest::uniformWorkloadAndMemoryAssignment);
+   setupBfs.init(domain, setup.rootBlocks[0], setup.rootBlocks[1], setup.rootBlocks[2],
+                         setup.periodic[0], setup.periodic[1], setup.periodic[2]);
+   setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numProcesses);
+
+   WALBERLA_LOG_INFO_ON_ROOT("===========================  BLOCK FOREST STATISTICS ============================");
+   WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks())
+   for (uint_t level = 0; level <= setup.refinementLevels; level++){
+      const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level);
+      WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks)
+   }
+
+   const real_t avgBlocksPerProc = real_c(setupBfs.getNumberOfBlocks()) / real_c(setupBfs.getNumberOfProcesses());
+   WALBERLA_LOG_INFO_ON_ROOT("Average blocks per process: " << avgBlocksPerProc);
+
+   const uint_t cellsPerBlock        = blockCells(setup, false);
+   const uint_t totalNumberCells     = setupBfs.getNumberOfBlocks() * cellsPerBlock;
+   const real_t averageCellsPerGPU   = avgBlocksPerProc * real_c(cellsPerBlock);
+   const uint_t cellsPerBlockGL      = blockCells(setup, true);
+   const uint_t totalNumberCellsGL   = setupBfs.getNumberOfBlocks() * cellsPerBlockGL;
+   const real_t averageCellsPerGPUGL = avgBlocksPerProc * real_c(cellsPerBlockGL);
+
+   const uint_t PDFsPerCell            = StorageSpecification_T::inplace ? Stencil_T::Q : 2 * Stencil_T::Q;
+   const uint_t valuesPerCell          = (PDFsPerCell + VelocityField_T::F_SIZE + ScalarField_T::F_SIZE);
+   const uint_t sizePerValue           = sizeof(StorageSpecification_T::value_type);
+   const double expectedMemory         = double_c(totalNumberCells * valuesPerCell * sizePerValue) * 1e-9;
+   const double expectedMemoryPerGPU   = double_c(averageCellsPerGPU * valuesPerCell * sizePerValue) * 1e-9;
+   const double expectedMemoryGL       = double_c(totalNumberCellsGL * valuesPerCell * sizePerValue) * 1e-9;
+   const double expectedMemoryPerGPUGL = double_c(averageCellsPerGPUGL * valuesPerCell * sizePerValue) * 1e-9;
+
+   WALBERLA_LOG_INFO_ON_ROOT( "Total number of cells will be " << totalNumberCells << " fluid cells (in total on all levels)")
+   WALBERLA_LOG_INFO_ON_ROOT( "Expected total memory demand will be " << expectedMemory << " GB")
+   WALBERLA_LOG_INFO_ON_ROOT( "Average memory demand per GPU will be " << expectedMemoryPerGPU << " GB")
+   WALBERLA_LOG_INFO_ON_ROOT( "Expected total memory demand (with GL) will be " << expectedMemoryGL << " GB")
+   WALBERLA_LOG_INFO_ON_ROOT( "Average memory demand per GPU (with GL) will be " << expectedMemoryPerGPUGL << " GB")
+
+   WALBERLA_LOG_INFO_ON_ROOT("=================================================================================")
+
+   if(mpi::MPIManager::instance()->numProcesses() > 1)
+      return;
+
+   if(setup.writeSetupForestAndReturn){
+      std::ostringstream oss;
+      oss << blockForestFilestem << ".bfs";
+      setupBfs.saveToFile(oss.str().c_str());
+
+      setupBfs.writeVTKOutput(blockForestFilestem);
+   }
+}
+
+void createBlockForest(shared_ptr< BlockForest >& bfs, const Setup& setup){
+   if (mpi::MPIManager::instance()->numProcesses() > 1){
+      // Load structured block forest from file
+      std::ostringstream oss;
+      oss << setup.blockForestFilestem << ".bfs";
+      const std::string setupBlockForestFilepath = oss.str();
+      std::ifstream infile(setupBlockForestFilepath.c_str());
+      if(!infile.good()){
+         WALBERLA_LOG_WARNING_ON_ROOT("Blockforest was not created beforehand and thus needs to be created on the fly. For large simulation runs this can be a severe problem!")
+         SetupBlockForest setupBfs;
+         createSetupBlockForest(setupBfs, setup, true);
+         bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
+      }
+      else{
+         bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()),
+                                               setupBlockForestFilepath.c_str(), false);
+      }
+   }
+   else{
+      SetupBlockForest setupBfs;
+      createSetupBlockForest(setupBfs, setup);
+      bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
+   }
+}
\ No newline at end of file
diff --git a/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp b/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp
index a3723d888af1498266b374f2fc79950534cc1620..d82111dc24e18f8ed53ff6218e5de108eb537255 100644
--- a/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp
+++ b/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp
@@ -46,6 +46,7 @@
 #include "field/vtk/FlagFieldCellFilter.h"
 
 #include "geometry/InitBoundaryHandling.h"
+#include "geometry/mesh/TriangleMeshIO.h"
 
 #if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
 #   include "gpu/AddGPUFieldToStorage.h"
@@ -68,6 +69,8 @@
 #   include "lbm_generated/gpu/UniformGeneratedGPUPdfPackInfo.h"
 #endif
 
+#include "postprocessing/FieldToSurfaceMesh.h"
+
 #include "timeloop/SweepTimeloop.h"
 #include "vtk/VTKOutput.h"
 
@@ -76,6 +79,8 @@
 #include <memory>
 
 #include "InitializerFunctions.h"
+#include "GridGeneration.h"
+#include "Setup.h"
 #include "Types.h"
 
 #include "PorousMediaGPUStaticDefines.h"
@@ -98,8 +103,7 @@ using blockforest::communication::UniformBufferedScheme;
 using lbm_generated::UniformGeneratedPdfPackInfo;
 #endif
 
-int main(int argc, char** argv)
-{
+int main(int argc, char** argv){
    Environment env( argc, argv );
    mpi::MPIManager::instance()->useWorldComm();
 #if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
@@ -112,54 +116,48 @@ int main(int argc, char** argv)
    ///                                        SETUP AND CONFIGURATION                                             ///
    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-   auto blocks = blockforest::createUniformBlockGridFromConfig(config);
-
-   auto domainParameters       = config->getOneBlock("DomainSetup");
-   const real_t coarseMeshSize = domainParameters.getParameter< real_t >("dx");
-
-   auto parameters          = config->getOneBlock("Parameters");
-   const uint_t timesteps   = parameters.getParameter< uint_t >("timesteps", uint_c(50));
-   const bool gpuEnabledMPI = parameters.getParameter< bool >("gpuEnabledMPI", false);
-
-   const real_t reynoldsNumber    = parameters.getParameter< real_t >("reynoldsNumber");
-   const real_t referenceVelocity = parameters.getParameter< real_t >("referenceVelocity");
-   const real_t referenceLength   = parameters.getParameter< real_t >("referenceLength");
-   const real_t latticeVelocity   = parameters.getParameter< real_t >("latticeVelocity");
-
-   const real_t waveNumber = parameters.getParameter< real_t >("waveNumber");
-   const real_t scaling    = parameters.getParameter< real_t >("scaling");
-
-   const real_t speedOfSound = real_c(real_c(1.0) / std::sqrt( real_c(3.0) ));
-   const real_t machNumber = latticeVelocity / speedOfSound;
-   // TODO define RE
-   const real_t viscosity = real_c((latticeVelocity * referenceLength) / reynoldsNumber);
-   const real_t omega     = real_c(real_c(1.0) / (real_c(3.0) * viscosity + real_c(0.5)));
-
-   const real_t dt = latticeVelocity / referenceVelocity * coarseMeshSize;
-
-   const StorageSpecification_T StorageSpec = StorageSpecification_T();
-
+   auto parameters        = config->getOneBlock("Parameters");
+   auto domainParameters  = config->getOneBlock("DomainSetup");
+   auto loggingParameters = config->getOneBlock("Logging");
+   auto boundariesConfig   = config->getBlock("Boundaries");
 
-   WALBERLA_LOG_INFO_ON_ROOT( "Benchmark run data:"
+   Setup setup(parameters, domainParameters, loggingParameters, infoMap);
+   WALBERLA_LOG_INFO_ON_ROOT( "Simulation run data:"
                               "\n- simulation parameters:"   << std::setprecision(16) <<
-                              "\n   + collision model:     " << infoMap["collisionOperator"] <<
-                              "\n   + stencil:             " << infoMap["stencil"] <<
-                              "\n   + streaming:           " << infoMap["streamingPattern"] <<
+                              "\n   + collision model:     " << setup.collisionModel <<
+                              "\n   + stencil:             " << setup.stencil <<
+                              "\n   + streaming:           " << setup.streamingPattern <<
                               "\n   + compressible:        " << ( StorageSpecification_T::compressible ? "yes" : "no" ) <<
-                              "\n   + resolution:          " << coarseMeshSize << " - on the coarsest grid" <<
+                              "\n   + resolution:          " << setup.dxC << " - on the coarsest grid" <<
                               "\n- simulation properties:  "
-                              "\n   + kin. viscosity:      " << viscosity * coarseMeshSize * coarseMeshSize / dt << " [m^2/s] (on the coarsest grid)" <<
-                              "\n   + viscosity LB:        " << viscosity  << " [dx*dx/dt] (on the coarsest grid)" <<
-                              "\n   + omega:               " << omega << " (on the coarsest grid)" <<
-                              "\n   + inflow velocity:     " << referenceVelocity << " [m/s]" <<
-                              "\n   + lattice velocity:    " << latticeVelocity << " [dx/dt]" <<
-                              "\n   + Reynolds number:     " << reynoldsNumber <<
-                              "\n   + Mach number:         " << machNumber <<
-                              "\n   + dx (coarsest grid):  " << coarseMeshSize << " [m]" <<
-                              "\n   + dt (coarsest grid):  " << dt << " [s]"
-                              "\n   + #time steps:         " << timesteps << " (on the coarsest grid, " << ( real_c(1.0) / dt ) << " for 1s of real time)" <<
-                              "\n   + simulation time:     " << ( real_c( timesteps ) * dt ) << " [s]" )
+                              "\n   + kin. viscosity:      " << setup.kinViscosity << " [m^2/s] (on the coarsest grid)" <<
+                              "\n   + viscosity LB:        " << setup.viscosity  << " [dx*dx/dt] (on the coarsest grid)" <<
+                              "\n   + omega:               " << setup.omega << " (on the coarsest grid)" <<
+                              "\n   + inflow velocity:     " << setup.referenceVelocity << " [m/s]" <<
+                              "\n   + lattice velocity:    " << setup.latticeVelocity << " [dx/dt]" <<
+                              "\n   + Reynolds number:     " << setup.reynoldsNumber <<
+                              "\n   + Mach number:         " << setup.machNumber <<
+                              "\n   + dx (coarsest grid):  " << setup.dxC << " [m]" <<
+                              "\n   + dt (coarsest grid):  " << setup.dt << " [s]" <<
+                              "\n   + conversion Faktor:   " << setup.conversionFactor <<
+                              "\n   + #time steps:         " << setup.timesteps << " (on the coarsest grid, " << ( real_c(1.0) / setup.dt ) << " for 1s of real time)" <<
+                              "\n   + simulation time:     " << ( real_c( setup.timesteps ) * setup.dt ) << " [s]" )
+
+   bool writeSetupForestAndReturn = loggingParameters.getParameter< bool >("writeSetupForestAndReturn", false);
+   if (uint_c(MPIManager::instance()->numProcesses()) > 1) writeSetupForestAndReturn = false;
+   shared_ptr< BlockForest > bfs;
+   createBlockForest(bfs, setup);
+
+   if (writeSetupForestAndReturn && mpi::MPIManager::instance()->numProcesses() == 1){
+      WALBERLA_LOG_INFO_ON_ROOT("BlockForest has been created and writen to file. Returning program")
+      logging::Logging::printFooterOnStream();
+      return EXIT_SUCCESS;
+   }
 
+   auto blocks = std::make_shared< StructuredBlockForest >(bfs, setup.cellsPerBlock[0], setup.cellsPerBlock[1], setup.cellsPerBlock[2]);
+   blocks->createCellBoundingBoxes();
+
+   const StorageSpecification_T StorageSpec = StorageSpecification_T();
    // Creating fields
    IDs ids;
    ids.pdfField  = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(1), field::fzyx);
@@ -174,10 +172,8 @@ int main(int argc, char** argv)
    ids.densityFieldGPU  = gpu::addGPUFieldToStorage< ScalarField_T >(blocks, ids.densityField, "velocity on GPU", true);
 
    const Cell innerOuterSplit = Cell(parameters.getParameter< Vector3<cell_idx_t> >("innerOuterSplit", Vector3<cell_idx_t>(1, 1, 1)));
-   SweepCollection_T sweepCollection(blocks, ids.pdfFieldGPU, ids.densityFieldGPU, ids.velocityFieldGPU, omega, innerOuterSplit);
-
-   for (auto& block : *blocks)
-   {
+   SweepCollection_T sweepCollection(blocks, ids.pdfFieldGPU, ids.densityFieldGPU, ids.velocityFieldGPU, setup.omega, innerOuterSplit);
+   for (auto& block : *blocks){
       sweepCollection.initialise(&block);
    }
 
@@ -186,25 +182,28 @@ int main(int argc, char** argv)
    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
    // Boundaries
-   const FlagUID fluidFlagUID("Fluid");
-   const FlagUID wallFlagUID("NoSlip");
-
-   auto boundariesConfig   = config->getBlock("Boundaries");
-   if (boundariesConfig)
-   {
+   if (boundariesConfig){
       WALBERLA_LOG_INFO_ON_ROOT("Setting boundary conditions")
       geometry::initBoundaryHandling< FlagField_T >(*blocks, ids.flagField, boundariesConfig);
-      init_TPMS(blocks, ids.flagField, wallFlagUID, waveNumber, scaling);
-      geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, ids.flagField, fluidFlagUID);
+      init_TPMS(blocks, ids.flagField, setup.obstacleUID, setup.waveNumber, setup.scaling);
+      geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, ids.flagField, setup.fluidUID);
    }
-   geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, ids.flagField, fluidFlagUID);
+   geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, ids.flagField, setup.fluidUID);
    // BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfFieldGPU, fluidFlagUID, latticeVelocity);
-   BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfFieldGPU, fluidFlagUID, latticeVelocity, ids.pdfField);
+   BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfFieldGPU, setup.fluidUID, setup.latticeVelocity, ids.pdfField);
 
+   if( loggingParameters.getParameter< bool >("writeSurfaceMeshOfTPMS", false) ){
+      WALBERLA_LOG_INFO_ON_ROOT("Writing TPMS as surface file")
+      auto mesh = postprocessing::flagFieldToSurfaceMesh< FlagField_T >(blocks, ids.flagField, 4, true);
+      WALBERLA_EXCLUSIVE_WORLD_SECTION(0){
+         geometry::writeMesh("TPMS.obj", *mesh);
+      }
+   }
    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    ///                                           COMMUNICATION SCHEME                                             ///
    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
+   const bool gpuEnabledMPI = parameters.getParameter< bool >("gpuEnabledMPI", false);
    UniformGPUScheme< Stencil_T > communication(blocks, gpuEnabledMPI);
    auto packInfo = std::make_shared<lbm_generated::UniformGeneratedGPUPdfPackInfo< GPUPdfField_T >>(ids.pdfFieldGPU);
    communication.addPackInfo(packInfo);
@@ -215,7 +214,7 @@ int main(int argc, char** argv)
    const int streamLowPriority = 0;
    auto defaultStream = gpu::StreamRAII::newPriorityStream(streamLowPriority);
 
-   SweepTimeloop timeLoop(blocks->getBlockStorage(), timesteps);
+   SweepTimeloop timeLoop(blocks->getBlockStorage(), setup.timesteps);
    timeLoop.add() << BeforeFunction(communication.getCommunicateFunctor(), "communication")
                   << Sweep(boundaryCollection.getSweep(BoundaryCollection_T::ALL, defaultStream), "Boundary Conditions");
    timeLoop.add() << Sweep(sweepCollection.streamCollide(SweepCollection_T::ALL, defaultStream), "LBM StreamCollide");
@@ -225,38 +224,81 @@ int main(int argc, char** argv)
    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
    // VTK
-   auto vtkParameters          = config->getOneBlock("VTKWriter");
-   const uint_t vtkWriteFrequency = vtkParameters.getParameter< uint_t >("vtkWriteFrequency", 0);
-   if (vtkWriteFrequency > 0)
-   {
-      auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
-                                                      "simulation_step", false, true, true, false, 0);
-      auto velWriter = make_shared< field::VTKWriter< VelocityField_T, float32 > >(ids.velocityField, "vel");
-      vtkOutput->addCellDataWriter(velWriter);
+   auto VTKWriter = config->getOneBlock("VTKWriter");
+   const uint_t vtkWriteFrequency       = VTKWriter.getParameter< uint_t >("vtkWriteFrequency", 0);
+   const uint_t vtkGhostLayers          = VTKWriter.getParameter< uint_t >("vtkGhostLayers", 0);
+   const bool amrFileFormat             = VTKWriter.getParameter< bool >("amrFileFormat", false);
+   const bool oneFilePerProcess         = VTKWriter.getParameter< bool >("oneFilePerProcess", false);
+   const real_t samplingResolution      = VTKWriter.getParameter< real_t >("samplingResolution", real_c(-1.0));
+   const uint_t initialWriteCallsToSkip = VTKWriter.getParameter< uint_t >("initialWriteCallsToSkip", uint_c(0.0));
+   const real_t sliceThickness          = VTKWriter.getParameter< real_t >("sliceThickness", real_c(1.0));
+
+   auto finalDomain = blocks->getDomain();
+   if (vtkWriteFrequency > 0){
+      const std::string vtkFolder = "VTKPorousMediaRE_" + std::to_string(uint64_c(setup.reynoldsNumber));
+      auto vtkOutput =
+         vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, vtkGhostLayers, false, vtkFolder,
+                                        "simulation_step", false, true, true, false, 0, amrFileFormat, oneFilePerProcess);
+
+      vtkOutput->setInitialWriteCallsToSkip(initialWriteCallsToSkip);
 
       vtkOutput->addBeforeFunction([&]() {
-         for (auto& block : *blocks)
+         for (auto& block : *blocks){
             sweepCollection.calculateMacroscopicParameters(&block);
+         }
+
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
          gpu::fieldCpy< VelocityField_T, gpu::GPUField< VelocityField_T::value_type > >(blocks, ids.velocityField, ids.velocityFieldGPU);
+         gpu::fieldCpy< ScalarField_T, gpu::GPUField< ScalarField_T::value_type > >(blocks, ids.densityField, ids.densityFieldGPU);
+#endif
       });
 
-      if (vtkParameters.getParameter< bool >("flag"))
-      {
+      vtkOutput->setSamplingResolution(samplingResolution);
+
+      field::FlagFieldCellFilter<FlagField_T> fluidFilter( ids.flagField );
+      fluidFilter.addFlag( setup.obstacleUID );
+      vtkOutput->addCellExclusionFilter(fluidFilter);
+
+
+      if (VTKWriter.getParameter< bool >("writeOnlySlice", true)){
+         const AABB sliceXY(finalDomain.xMin(), finalDomain.yMin(), finalDomain.center()[2] - sliceThickness * setup.dxF,
+                            finalDomain.xMax(), finalDomain.yMax(), finalDomain.center()[2] + sliceThickness * setup.dxF);
+         vtkOutput->addCellInclusionFilter(vtk::AABBCellFilter(sliceXY));
+
+         if (VTKWriter.getParameter< bool >("writeXZSlice", false)){
+            const AABB sliceXZ(finalDomain.xMin(), finalDomain.center()[1] - setup.dxF, finalDomain.zMin(),
+                               finalDomain.xMax(), finalDomain.center()[1] + setup.dxF, finalDomain.zMax());
+            vtkOutput->addCellInclusionFilter(vtk::AABBCellFilter(sliceXZ));
+         }
+      }
+
+      if (VTKWriter.getParameter< bool >("velocity")){
+         if (VTKWriter.getParameter< bool >("writeVelocityAsMagnitude")){
+            auto velWriter = make_shared< field::VTKMagnitudeWriter< VelocityField_T, float32 > >(ids.velocityField, "velocityMagnitude");
+            vtkOutput->addCellDataWriter(velWriter);
+         }
+         else{
+            auto velWriter = make_shared< field::VTKWriter< VelocityField_T, float32 > >(ids.velocityField, "velocity");
+            vtkOutput->addCellDataWriter(velWriter);
+         }
+
+      }
+      if (VTKWriter.getParameter< bool >("density")){
+         auto densityWriter = make_shared< field::VTKWriter< ScalarField_T, float32 > >(ids.densityField, "density");
+         vtkOutput->addCellDataWriter(densityWriter);
+      }
+      if (VTKWriter.getParameter< bool >("flag")){
          auto flagWriter = make_shared< field::VTKWriter< FlagField_T > >(ids.flagField, "flag");
          vtkOutput->addCellDataWriter(flagWriter);
       }
-
       timeLoop.addFuncAfterTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
    }
 
       // log remaining time
-   auto loggingParameters= config->getOneBlock("Logging");
-   const real_t remainingTimeLoggerFrequency =
-      loggingParameters.getParameter< real_t >("remainingTimeLoggerFrequency", 3.0); // in seconds
-   if (uint_c(remainingTimeLoggerFrequency) > 0)
-   {
+   
+   if (uint_c(setup.remainingTimeLoggerFrequency) > 0){
       timeLoop.addFuncAfterTimeStep(
-         timing::RemainingTimeLogger(timeLoop.getNrOfTimeSteps(), remainingTimeLoggerFrequency),
+         timing::RemainingTimeLogger(timeLoop.getNrOfTimeSteps(), setup.remainingTimeLoggerFrequency),
          "remaining time logger");
    }
 
@@ -266,8 +308,8 @@ int main(int argc, char** argv)
    WALBERLA_GPU_CHECK(gpuPeekAtLastError())
 #endif
 
-   const lbm_generated::PerformanceEvaluation< FlagField_T > performance(blocks, ids.flagField, fluidFlagUID);
-   field::CellCounter< FlagField_T > fluidCells(blocks, ids.flagField, fluidFlagUID);
+   const lbm_generated::PerformanceEvaluation< FlagField_T > performance(blocks, ids.flagField, setup.fluidUID);
+   field::CellCounter< FlagField_T > fluidCells(blocks, ids.flagField, setup.fluidUID);
    fluidCells();
 
    WALBERLA_LOG_INFO_ON_ROOT("Starting Simulation")
@@ -291,7 +333,7 @@ int main(int argc, char** argv)
    WALBERLA_LOG_INFO_ON_ROOT("Simulation finished")
    double time = double_c(simTimer.max());
    WALBERLA_MPI_SECTION() { walberla::mpi::reduceInplace(time, walberla::mpi::MAX); }
-   performance.logResultOnRoot(timesteps, time);
+   performance.logResultOnRoot(setup.timesteps, time);
 
    const auto reducedTimeloopTiming = timeloopTiming.getReduced();
    WALBERLA_LOG_RESULT_ON_ROOT("Time loop timing:\n" << *reducedTimeloopTiming)
diff --git a/apps/showcases/PorousMediaGPU/PorousMediaGPU.prm b/apps/showcases/PorousMediaGPU/PorousMediaGPU.prm
index 25191e286ea07d8426cbde21d358899168ff734b..2f5c3d2270ee5833374d6895e4b28be4e6bc112a 100644
--- a/apps/showcases/PorousMediaGPU/PorousMediaGPU.prm
+++ b/apps/showcases/PorousMediaGPU/PorousMediaGPU.prm
@@ -10,7 +10,7 @@ Parameters
     waveNumber 6.2831853072;
     scaling 10.0;
 
-    timesteps 50001;
+    timesteps 10;
 
     processMemoryLimit 512; // MiB
     innerOuterSplit <1, 1, 1>;
@@ -22,10 +22,13 @@ Parameters
 //! [domainSetup]
 DomainSetup
 {
-    cellsPerBlock < 1536, 192, 192 >;
+    domainSize    < 8, 1, 1 >;
+    cellsPerBlock < 384, 48, 48 >;
     blocks    < 1, 1, 1 >;
     periodic    < false, true, true >;
-    dx 0.00520833333333;
+    refinementLevels 0;
+    numberProcesses 1; // This is for load balancing, overwritten if more than one proc is used
+    blockForestFilestem porousMediaBlockForest;
 }
 //! [domainSetup]
 
@@ -45,16 +48,16 @@ StabilityChecker
 
 VTKWriter
 {
-    vtkWriteFrequency 5000;
+    vtkWriteFrequency 5;
     vtkGhostLayers 0;
     velocity true;
-    density true;
-    flag true;
-    omega false;
+    writeVelocityAsMagnitude true;
+    density false;
+    flag false;
     writeOnlySlice true;
     sliceThickness 1;
     writeXZSlice false;
-    amrFileFormat true;
+    amrFileFormat false;
     oneFilePerProcess false;
     samplingResolution -1;
     initialWriteCallsToSkip 0;
@@ -63,9 +66,10 @@ VTKWriter
 Logging
 {
     logLevel info;  // info progress detail tracing
-    writeSetupForestAndReturn true; // When only one process is used the decomposition is writen and the program terminates
+    writeSetupForestAndReturn false; // When only one process is used the decomposition is writen and the program terminates
     readSetupFromFile false;
     remainingTimeLoggerFrequency 20; // in seconds
+    writeSurfaceMeshOfTPMS false;
 }
 
 Evaluation
diff --git a/apps/showcases/PorousMediaGPU/Setup.h b/apps/showcases/PorousMediaGPU/Setup.h
new file mode 100644
index 0000000000000000000000000000000000000000..889b8bd27a6a05888edbd68677edd670fd86f396
--- /dev/null
+++ b/apps/showcases/PorousMediaGPU/Setup.h
@@ -0,0 +1,145 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file Setup.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#pragma once
+
+#include "core/config/Config.h"
+#include "core/DataTypes.h"
+#include "core/math/Vector3.h"
+
+#include <string>
+
+#include "Types.h"
+
+namespace walberla
+{
+struct Setup
+{
+
+   Setup(const Config::BlockHandle & parameters, const Config::BlockHandle & domainParameters,
+         const Config::BlockHandle & logging, std::map<std::string, std::string>& infoMap)
+   {
+      blockForestFilestem = domainParameters.getParameter< std::string >("blockForestFilestem", "blockforest");
+      numProcesses        = domainParameters.getParameter< uint_t >("numberProcesses");
+      domainSize          = domainParameters.getParameter< Vector3< real_t > >("domainSize");
+      cellsPerBlock       = domainParameters.getParameter< Vector3< uint_t > >("cellsPerBlock");
+      rootBlocks          = domainParameters.getParameter< Vector3< uint_t > >("blocks");
+      cells               = Vector3<uint_t>(rootBlocks[0] * cellsPerBlock[0], rootBlocks[1] * cellsPerBlock[1], rootBlocks[2] * cellsPerBlock[2]);
+      periodic           = domainParameters.getParameter< Vector3< bool > >("periodic");
+      refinementLevels    = domainParameters.getParameter< uint_t >("refinementLevels");
+
+      const real_t coarseMeshSize = domainSize[0] / real_c(rootBlocks[0] * cellsPerBlock[0]);
+      WALBERLA_CHECK_FLOAT_EQUAL_EPSILON(coarseMeshSize, domainSize[1] / real_c(rootBlocks[1] * cellsPerBlock[1]), real_c(1e-12), "Resulting resolution is not equal in x and y direxction. This is not supported");
+      WALBERLA_CHECK_FLOAT_EQUAL_EPSILON(coarseMeshSize, domainSize[2] / real_c(rootBlocks[2] * cellsPerBlock[2]), real_c(1e-12), "Resulting resolution is not equal in x and z direxction. This is not supported");
+
+      referenceLength   = domainSize[1];
+      reynoldsNumber    = parameters.getParameter< real_t >("reynoldsNumber");
+      referenceVelocity = parameters.getParameter< real_t >("referenceVelocity");
+      latticeVelocity   = parameters.getParameter< real_t >("latticeVelocity");
+
+      waveNumber = parameters.getParameter< real_t >("waveNumber");
+      scaling    = parameters.getParameter< real_t >("scaling");
+
+      const real_t speedOfSound = real_c(real_c(1.0) / std::sqrt( real_c(3.0) ));
+      machNumber = latticeVelocity / speedOfSound;
+      viscosity  = real_c((latticeVelocity * referenceLength) / reynoldsNumber);
+      omega      = real_c(real_c(1.0) / (real_c(3.0) * viscosity + real_c(0.5)));
+
+      rho = real_c(1.0);
+      dxC = coarseMeshSize;
+      dxF = real_c(coarseMeshSize) / real_c( 1 << refinementLevels );
+      dt  = latticeVelocity / referenceVelocity * coarseMeshSize;
+
+      conversionFactor = dxC / dt;
+
+      kinViscosity = viscosity * coarseMeshSize * coarseMeshSize / dt;
+
+      timesteps = parameters.getParameter< uint_t >("timesteps");
+      numGhostLayers = refinementLevels == 0 ? uint_c(1) : uint_c(2);
+
+      valuesPerCell = (Stencil_T::Q + VelocityField_T::F_SIZE + uint_c(1) * ScalarField_T::F_SIZE);
+      memoryPerCell = memory_t(valuesPerCell * sizeof(PdfField_T::value_type) + uint_c(1));
+      processMemoryLimit = parameters.getParameter< memory_t >( "processMemoryLimit", memory_t( 512 ) ) * memory_t( 1024 * 1024 );
+
+      stencil          = infoMap["stencil"];
+      streamingPattern = infoMap["streamingPattern"];
+      collisionModel   = infoMap["collisionOperator"];
+
+      fluidUID    = FlagUID("Fluid");
+      obstacleUID = FlagUID("NoSlip");
+      inflowUID   = FlagUID("UBB");
+      outflowUID  = FlagUID("Outflow");
+
+      writeSetupForestAndReturn    = logging.getParameter< bool >("writeSetupForestAndReturn", false);
+      remainingTimeLoggerFrequency = logging.getParameter< real_t >("remainingTimeLoggerFrequency", 3.0);
+   }
+
+   std::string blockForestFilestem;
+   uint_t numProcesses;
+
+   Vector3<uint_t> rootBlocks;
+   Vector3<uint_t> cells;
+   Vector3<real_t> domainSize;
+
+   Vector3< bool > periodic;
+   uint_t refinementLevels;
+
+   Vector3< uint_t > cellsPerBlock;
+   uint_t numGhostLayers;
+
+   real_t referenceVelocity;
+   real_t referenceLength;
+   real_t latticeVelocity;
+
+   real_t waveNumber;
+   real_t scaling;
+
+   real_t machNumber;
+   real_t reynoldsNumber;
+
+   real_t viscosity;
+   real_t kinViscosity;
+   real_t omega;
+   real_t rho;
+   real_t dxC;
+   real_t dxF;
+   real_t dt;
+
+   real_t conversionFactor;
+
+   uint_t timesteps;
+
+   uint_t valuesPerCell;
+   memory_t memoryPerCell;
+   memory_t processMemoryLimit;
+
+   std::string stencil;
+   std::string streamingPattern;
+   std::string collisionModel;
+
+   FlagUID fluidUID;
+   FlagUID obstacleUID;
+   FlagUID inflowUID;
+   FlagUID outflowUID;
+
+   bool writeSetupForestAndReturn;
+   real_t remainingTimeLoggerFrequency;
+};
+
+}
\ No newline at end of file
diff --git a/src/field/vtk/VTKWriter.h b/src/field/vtk/VTKWriter.h
index 9b2232908b09766e9296c5529ff5dc205ae3a1d9..436c4aab145e8f96c8cd2b14019f0ee121f884d3 100644
--- a/src/field/vtk/VTKWriter.h
+++ b/src/field/vtk/VTKWriter.h
@@ -105,7 +105,7 @@ class VTKWriter : public vtk::BlockCellDataWriter< OutputType,
 public:
    using OutputTrait = VectorTrait<typename Field_T::value_type>;
 
-   using base_t = vtk::BlockCellDataWriter<OutputType, OutputTrait::F_SIZE * Field_T::F_SIZE>;
+   using base_t = vtk::BlockCellDataWriter<OutputType, OutputTrait::F_SIZE * Field_T::F_SIZE>; 
 
    VTKWriter( const ConstBlockDataID bdid, const std::string& id ) :
       base_t( id ), bdid_( bdid ), field_( nullptr ) {}
@@ -140,6 +140,46 @@ protected:
 
 };
 
+template< typename Field_T, typename OutputType = float >
+class VTKMagnitudeWriter : public vtk::BlockCellDataWriter< OutputType, 1 >
+{
+public:
+
+   using base_t = vtk::BlockCellDataWriter<OutputType, 1>;
+   using type_t = typename Field_T::value_type;
+
+   VTKMagnitudeWriter( const ConstBlockDataID bdid, const std::string& id ) :
+      base_t( id ), bdid_( bdid ), field_( nullptr ) {}
+
+protected:
+
+   void configure() override {
+      WALBERLA_ASSERT_NOT_NULLPTR( this->block_ );
+      field_ = this->block_->template getData< Field_T >( bdid_ );
+   }
+
+   OutputType evaluate( const cell_idx_t x, const cell_idx_t y, const cell_idx_t z, const cell_idx_t /*f*/ ) override
+   {
+      WALBERLA_ASSERT_NOT_NULLPTR( field_ );
+      if ( Field_T::F_SIZE == 1 ){
+         return numeric_cast<OutputType>( field_->get(x,y,z,0) );
+      }
+      else if (Field_T::F_SIZE == 2){
+         return numeric_cast<OutputType>( std::sqrt( field_->get(x,y,z,0)*field_->get(x,y,z,0) + field_->get(x,y,z,1)*field_->get(x,y,z,1) ));
+      }
+      else if (Field_T::F_SIZE == 3){
+         return numeric_cast<OutputType>( std::sqrt( field_->get(x,y,z,0)*field_->get(x,y,z,0) + field_->get(x,y,z,1)*field_->get(x,y,z,1) + field_->get(x,y,z,2)*field_->get(x,y,z,2) ));
+      }
+      else{
+         WALBERLA_ABORT("Only fields with 1, 2 or 3 entries per cell are supported");
+      }
+   }
+
+   const ConstBlockDataID bdid_;
+   const Field_T * field_;
+
+}; // class VTKMagnitudeWriter
+