diff --git a/apps/showcases/CMakeLists.txt b/apps/showcases/CMakeLists.txt
index 9426ce664d2cba8b9f2e64b17e5d6776bfaa671f..5b64c3487011c7dc3338a8883bba4a5cfa795ecf 100644
--- a/apps/showcases/CMakeLists.txt
+++ b/apps/showcases/CMakeLists.txt
@@ -11,6 +11,7 @@ if ( WALBERLA_BUILD_WITH_CODEGEN)
 
    add_subdirectory( Antidunes )
    add_subdirectory( FlowAroundSphere )
+   add_subdirectory( Channel )
 
    if( WALBERLA_BUILD_WITH_HDF5)
       add_subdirectory( PorousMediaGPU )
diff --git a/apps/showcases/Channel/CMakeLists.txt b/apps/showcases/Channel/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5974e7970737cd62cd4cecf81be9ab2c9db6757
--- /dev/null
+++ b/apps/showcases/Channel/CMakeLists.txt
@@ -0,0 +1,30 @@
+waLBerla_link_files_to_builddir( *.py )
+waLBerla_link_files_to_builddir( *.prm )
+waLBerla_link_files_to_builddir( *.png )
+waLBerla_link_files_to_builddir( *.obj )
+waLBerla_link_files_to_builddir( *.stl )
+waLBerla_link_files_to_builddir( *.mtl )
+
+waLBerla_generate_target_from_python(NAME ChannelCodeGen
+        FILE Channel.py
+        OUT_FILES ChannelSweepCollection.h ChannelSweepCollection.${CODEGEN_FILE_SUFFIX}
+        ChannelStorageSpecification.h ChannelStorageSpecification.${CODEGEN_FILE_SUFFIX}
+        NoSlip.h NoSlip.${CODEGEN_FILE_SUFFIX}
+        UBB.h UBB.${CODEGEN_FILE_SUFFIX}
+        Outflow.h Outflow.${CODEGEN_FILE_SUFFIX}
+        ChannelBoundaryCollection.h
+        ChannelInfoHeader.h
+        ChannelStaticDefines.h)
+
+if (WALBERLA_BUILD_WITH_CUDA OR WALBERLA_BUILD_WITH_HIP)
+        waLBerla_add_executable ( NAME Channel
+                FILES Channel.cpp
+                DEPENDS blockforest boundary core field gpu lbm_generated postprocessing stencil timeloop vtk ChannelCodeGen )
+else()
+        waLBerla_add_executable ( NAME Channel
+                FILES Channel.cpp
+                DEPENDS blockforest boundary core field lbm_generated postprocessing stencil timeloop vtk ChannelCodeGen )
+
+endif(WALBERLA_BUILD_WITH_CUDA OR WALBERLA_BUILD_WITH_HIP)
+
+
diff --git a/apps/showcases/Channel/Channel.cpp b/apps/showcases/Channel/Channel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f0f4ce5bbd37ee21176988bb1d67e110084d5334
--- /dev/null
+++ b/apps/showcases/Channel/Channel.cpp
@@ -0,0 +1,520 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file PorousMediaGPU.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#include "blockforest/Initialization.h"
+#include "blockforest/SetupBlock.h"
+#include "blockforest/SetupBlockForest.h"
+#include "blockforest/loadbalancing/StaticCurve.h"
+#include "blockforest/StructuredBlockForest.h"
+#include "blockforest/communication/NonUniformBufferedScheme.h"
+
+#include "core/Abort.h"
+#include "core/DataTypes.h"
+#include "core/SharedFunctor.h"
+#include "core/debug/CheckFunctions.h"
+#include "core/logging/Logging.h"
+#include "core/math/Vector3.h"
+#include "core/math/Constants.h"
+#include "core/Environment.h"
+#include "core/mpi/MPIManager.h"
+#include "core/MemoryUsage.h"
+#include "core/mpi/Reduce.h"
+#include "core/timing/RemainingTimeLogger.h"
+#include "core/timing/TimingPool.h"
+
+#include "field/AddToStorage.h"
+#include "field/CellCounter.h"
+#include "field/FlagField.h"
+#include "field/StabilityChecker.h"
+#include "field/adaptors/AdaptorCreators.h"
+#include "field/iterators/FieldIterator.h"
+#include "field/vtk/VTKWriter.h"
+#include "field/vtk/FlagFieldCellFilter.h"
+
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+#   include "gpu/AddGPUFieldToStorage.h"
+#   include "gpu/DeviceSelectMPI.h"
+#   include "gpu/ErrorChecking.h"
+#   include "gpu/FieldCopy.h"
+#   include "gpu/HostFieldAllocator.h"
+#   include "gpu/ParallelStreams.h"
+#   include "gpu/communication/NonUniformGPUScheme.h"
+#   include "gpu/communication/UniformGPUScheme.h"
+#endif
+
+#include "geometry/InitBoundaryHandling.h"
+#include "geometry/mesh/TriangleMeshIO.h"
+
+#include "lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h"
+#include "lbm_generated/evaluation/PerformanceEvaluation.h"
+#include "lbm_generated/field/AddToStorage.h"
+#include "lbm_generated/field/PdfField.h"
+#include "lbm_generated/refinement/BasicRecursiveTimeStep.h"
+
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+#   include "lbm_generated/gpu/AddToStorage.h"
+#   include "lbm_generated/gpu/BasicRecursiveTimeStepGPU.h"
+#   include "lbm_generated/gpu/GPUPdfField.h"
+#   include "lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h"
+#   include "lbm_generated/gpu/UniformGeneratedGPUPdfPackInfo.h"
+#endif
+
+#include "timeloop/SweepTimeloop.h"
+#include "vtk/VTKOutput.h"
+
+#include <cstdlib>
+#include <iostream>
+#include <memory>
+
+#include "ChannelStaticDefines.h"
+#include "ChannelInfoHeader.h"
+
+using namespace walberla;
+using FlagField_T          = FlagField< uint8_t >;
+
+using StorageSpecification_T = lbm::ChannelStorageSpecification;
+using Stencil_T              = StorageSpecification_T::Stencil;
+using CommunicationStencil_T = StorageSpecification_T::CommunicationStencil;
+using BoundaryCollection_T = lbm::ChannelBoundaryCollection< FlagField_T >;
+using SweepCollection_T = lbm::ChannelSweepCollection;
+
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+using PdfField_T = lbm_generated::PdfField< StorageSpecification_T >;
+using GPUPdfField_T = lbm_generated::GPUPdfField< StorageSpecification_T >;
+using gpu::communication::NonUniformGPUScheme;
+using gpu::communication::UniformGPUScheme;
+
+using lbm_generated::NonuniformGeneratedGPUPdfPackInfo;
+using lbm_generated::UniformGeneratedGPUPdfPackInfo;
+using RefinementTimeStep = lbm_generated::BasicRecursiveTimeStepGPU< GPUPdfField_T, SweepCollection_T, BoundaryCollection_T >;
+#else
+using PdfField_T = lbm_generated::PdfField< StorageSpecification_T >;
+using blockforest::communication::NonUniformBufferedScheme;
+
+using lbm_generated::NonuniformGeneratedPdfPackInfo;
+using RefinementTimeStep = lbm_generated::BasicRecursiveTimeStep< PdfField_T, SweepCollection_T, BoundaryCollection_T >;
+#endif
+
+using macroFieldType = VelocityField_T::value_type;
+using RefinementSelectionFunction = std::function<void (SetupBlockForest &)>;
+
+struct IDs
+{
+   BlockDataID pdfField;
+   BlockDataID velocityField;
+   BlockDataID densityField;
+   BlockDataID flagField;
+
+   BlockDataID pdfFieldGPU;
+   BlockDataID velocityFieldGPU;
+   BlockDataID densityFieldGPU;
+};
+
+void createBlockForest(shared_ptr< BlockForest >& bfs, std::shared_ptr<Config>& config, const RefinementSelectionFunction& refinementSelection)
+{
+   auto domainParameters  = config->getOneBlock("DomainSetup");
+
+   WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...")
+
+   const uint_t numProcesses = uint_c(mpi::MPIManager::instance()->numProcesses());
+   const std::string blockForestFilestem = "Channel";
+
+   SetupBlockForest setupBfs;
+   setupBfs.addRefinementSelectionFunction(refinementSelection);
+
+   Vector3< uint_t > cellsPerBlock = domainParameters.getParameter< Vector3< uint_t > >("cellsPerBlock");
+   Vector3< real_t > domainSize = domainParameters.getParameter< Vector3< real_t > >("domainSize");
+   const real_t coarseMeshSize = domainParameters.getParameter< real_t >("resolution");
+   Vector3< bool >  periodic    = domainParameters.getParameter< Vector3< bool > >("periodic");
+   uint_t refinementLevels      = domainParameters.getParameter< uint_t >("refinementLevels");
+
+   Vector3<uint_t> rootBlocks;
+   rootBlocks[0] = uint_c(std::ceil( (domainSize[0] / coarseMeshSize) / real_c(cellsPerBlock[0])));
+   rootBlocks[1] = uint_c(std::ceil( (domainSize[1] / coarseMeshSize) / real_c(cellsPerBlock[1])));
+   rootBlocks[2] = uint_c(std::ceil( (domainSize[2] / coarseMeshSize) / real_c(cellsPerBlock[2])));
+
+   const AABB domain(real_c(0.0), real_c(0.0), real_c(0.0), domainSize[0], domainSize[1], domainSize[2]);
+   setupBfs.addWorkloadMemorySUIDAssignmentFunction(blockforest::uniformWorkloadAndMemoryAssignment);
+   setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2],
+                         periodic[0], periodic[1], periodic[2]);
+   setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numProcesses);
+   bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
+
+   WALBERLA_LOG_INFO_ON_ROOT("===========================  BLOCK FOREST STATISTICS ============================");
+   WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks())
+   for (uint_t level = 0; level <= refinementLevels; level++){
+      const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level);
+      WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks)
+   }
+
+   const real_t avgBlocksPerProc = real_c(setupBfs.getNumberOfBlocks()) / real_c(setupBfs.getNumberOfProcesses());
+   WALBERLA_LOG_INFO_ON_ROOT("Average blocks per process: " << avgBlocksPerProc);
+
+   const uint_t blockCells        = cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2];;
+   const uint_t totalNumberCells     = setupBfs.getNumberOfBlocks() * blockCells;
+   const real_t averageCellsPerGPU   = avgBlocksPerProc * real_c(blockCells);
+
+   const uint_t PDFsPerCell            = StorageSpecification_T::inplace ? Stencil_T::Q : 2 * Stencil_T::Q;
+   const uint_t valuesPerCell          = (PDFsPerCell + VelocityField_T::F_SIZE + ScalarField_T::F_SIZE);
+   const uint_t sizePerValue           = sizeof(StorageSpecification_T::value_type);
+   const double expectedMemory         = double_c(totalNumberCells * valuesPerCell * sizePerValue) * 1e-9;
+
+   WALBERLA_LOG_INFO_ON_ROOT( "Total number of cells will be " << totalNumberCells << " fluid cells (in total on all levels)")
+   WALBERLA_LOG_INFO_ON_ROOT( "Expected total memory demand will be " << expectedMemory << " GB")
+
+   WALBERLA_LOG_INFO_ON_ROOT("=================================================================================")
+
+   if(mpi::MPIManager::instance()->numProcesses() > 1)
+      return;
+
+   auto logging           = config->getOneBlock("Logging");
+   bool writeSetupForestAndReturn    = logging.getParameter< bool >("writeSetupForestAndReturn", false);
+
+   if(writeSetupForestAndReturn){
+      std::ostringstream oss;
+      oss << blockForestFilestem << ".bfs";
+      setupBfs.saveToFile(oss.str().c_str());
+
+      setupBfs.writeVTKOutput(blockForestFilestem);
+   }
+}
+
+void refineWalls(SetupBlockForest& forest, const uint_t level)
+{
+   auto domain = forest.getDomain();
+   const AABB minAABB(domain.xMin(), domain.yMin(), domain.zMin(), domain.xMax(), domain.yMin() + 1.0, domain.zMax());
+
+   for (auto block = forest.begin(); block != forest.end(); ++block){
+      const AABB& aabb = block->getAABB();
+
+      if (block->getLevel() < level && minAABB.intersects(aabb) ){
+         block->setMarker(true);
+      }
+   }
+}
+
+void defineObstacles(const shared_ptr< StructuredBlockStorage >& blocks, const BlockDataID flagFieldID, const FlagUID obstacleUID )
+{
+   auto domain = blocks->getDomain();
+   const real_t xMin = domain.xMin();
+   const real_t xMax = domain.xMax();
+
+   for (auto bIt = blocks->begin(); bIt != blocks->end(); ++bIt){
+      Block& b             = dynamic_cast< Block& >(*bIt);
+      const auto flagField    = b.getData< FlagField_T >(flagFieldID);
+      const auto boundaryFlag = flagField->getOrRegisterFlag(obstacleUID);
+
+      for( auto it = flagField->beginWithGhostLayer(1); it != flagField->end(); ++it ){
+         Vector3< real_t > cellCenter = blocks->getBlockLocalCellCenter( b, it.cell() );
+         blocks->mapToPeriodicDomain(cellCenter);
+
+         if(cellCenter[0] > 1.0 && cellCenter[0] < 2.0 && cellCenter[1] > 1.2 && cellCenter[1] < 2.0 && cellCenter[2] > 1.2 && cellCenter[2] < 2.0 ){
+            addFlag(flagField->get(it.x(), it.y(), it.z()), boundaryFlag);
+
+         }
+      }
+   }
+}
+
+int main(int argc, char** argv){
+   Environment env( argc, argv );
+   mpi::MPIManager::instance()->useWorldComm();
+   #if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+      gpu::selectDeviceBasedOnMpiRank();
+   #endif
+
+   auto config = env.config();
+
+   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+   ///                                        SETUP AND CONFIGURATION                                             ///
+   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+   auto parameters        = config->getOneBlock("Parameters");
+   auto loggingParameters = config->getOneBlock("Logging");
+   auto boundariesConfig  = config->getBlock("Boundaries");
+   auto domainParameters  = config->getOneBlock("DomainSetup");
+
+
+   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+   ///                                      Parameters and initialisation                                         ///
+   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+   Vector3< real_t > domainSize = domainParameters.getParameter< Vector3< real_t > >("domainSize");
+   const real_t coarseMeshSize = domainParameters.getParameter< real_t >("resolution");
+
+   const real_t reynoldsNumber = parameters.getParameter< real_t >("reynoldsNumber");
+   const real_t referenceVelocity = parameters.getParameter< real_t >("referenceVelocity");
+   const real_t drivingForce = parameters.getParameter< real_t >("drivingForce");
+   
+   const real_t referenceVelocity_L = parameters.getParameter< real_t >("latticeVelocity");
+   const real_t referenceLength_L = (domainSize[0] / coarseMeshSize) / real_c(2.0);
+   const real_t dt = referenceVelocity_L / referenceVelocity * coarseMeshSize;
+   const real_t viscosity_L = real_c((referenceVelocity_L * referenceLength_L) / reynoldsNumber);
+   const real_t omega = real_c(real_c(1.0) / (real_c(3.0) * viscosity_L + real_c(0.5)));
+   
+   const bool initialiseWithVelocity = parameters.getParameter< bool >("initialiseWithVelocity");
+   const uint_t timesteps = parameters.getParameter< uint_t >("timesteps");
+
+   bool writeSetupForestAndReturn = loggingParameters.getParameter< bool >("writeSetupForestAndReturn", false);
+   if (uint_c(MPIManager::instance()->numProcesses()) > 1) writeSetupForestAndReturn = false;
+
+   const uint_t refinementLevels = domainParameters.getParameter< uint_t >("refinementLevels");
+   Vector3< uint_t > cellsPerBlock = domainParameters.getParameter< Vector3< uint_t > >("cellsPerBlock");
+   
+   auto refinementSelection = std::bind(refineWalls, std::placeholders::_1, refinementLevels);
+
+   shared_ptr< BlockForest > bfs;
+   createBlockForest(bfs, config, refinementSelection);
+
+   WALBERLA_LOG_INFO_ON_ROOT( "Benchmark run data:"
+                           "\n- simulation parameters:"   << std::setprecision(16) <<
+                           "\n   + collision model:     " << infoMap["collisionOperator"] <<
+                           "\n   + stencil:             " << infoMap["stencil"] <<
+                           "\n   + streaming:           " << infoMap["streamingPattern"] <<
+                           "\n   + compressible:        " << ( StorageSpecification_T::compressible ? "yes" : "no" ) <<
+                           "\n   + mesh levels:         " << refinementLevels + uint_c(1) <<
+                           "\n   + resolution:          " << coarseMeshSize << " - on the coarsest grid" <<
+                           "\n- simulation properties:  "
+                           "\n   + viscosity LB:        " << viscosity_L  << " [dx*dx/dt] (on the coarsest grid)" <<
+                           "\n   + omega:               " << omega << " (on the coarsest grid)" <<
+                           "\n   + inflow velocity:     " << referenceVelocity << " [m/s]" <<
+                           "\n   + lattice velocity:    " << referenceVelocity_L << " [dx/dt]" <<
+                           "\n   + Reynolds number:     " << reynoldsNumber <<
+                           "\n   + dt (coarsest grid):  " << dt << " [s]"
+                           "\n   + #time steps:         " << timesteps << " (on the coarsest grid, " << ( real_c(1.0) / dt ) << " for 1s of real time)" <<
+                           "\n   + simulation time:     " << ( real_c( timesteps ) * dt ) << " [s]" )
+
+   if (writeSetupForestAndReturn && mpi::MPIManager::instance()->numProcesses() == 1){
+      WALBERLA_LOG_INFO_ON_ROOT("BlockForest has been created and writen to file. Returning program")
+      logging::Logging::printFooterOnStream();
+      return EXIT_SUCCESS;
+   }
+
+   auto blocks = std::make_shared< StructuredBlockForest >(bfs, cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2]);
+   blocks->createCellBoundingBoxes();
+
+   auto finalDomain = blocks->getDomain();
+
+   const StorageSpecification_T StorageSpec = StorageSpecification_T();
+   // Creating fields
+   uint_t step = 0;
+   WALBERLA_LOG_INFO_ON_ROOT(++step << ". Allocating data")
+   IDs ids;
+
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+   auto PDFAllocator = make_shared< gpu::HostFieldAllocator< PdfField_T::value_type > >();
+   auto allocator = make_shared< gpu::HostFieldAllocator< VelocityField_T::value_type > >();
+   ids.pdfField = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(2), field::fzyx, PDFAllocator);
+   ids.velocityField = field::addToStorage< VelocityField_T >(blocks, "velocity", real_c(0.0), field::fzyx, uint_c(2), allocator);
+   ids.densityField = field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx, uint_c(2), allocator);
+   ids.flagField = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field", uint_c(3));
+
+   ids.pdfFieldGPU = lbm_generated::addGPUPdfFieldToStorage< PdfField_T >(blocks, ids.pdfField, StorageSpec, "pdfs on GPU", true);
+   ids.velocityFieldGPU = gpu::addGPUFieldToStorage< VelocityField_T >(blocks, ids.velocityField, "velocity on GPU", true);
+   ids.densityFieldGPU = gpu::addGPUFieldToStorage< ScalarField_T >(blocks, ids.densityField, "density on GPU", true);
+
+   WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+   WALBERLA_GPU_CHECK(gpuPeekAtLastError())
+#else
+   ids.pdfField = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(2), field::fzyx);
+   ids.velocityField = field::addToStorage< VelocityField_T >(blocks, "vel", VelocityField_T::value_type(0.0), field::fzyx, uint_c(2));
+   ids.densityField = field::addToStorage< ScalarField_T >(blocks, "density", ScalarField_T::value_type(1.0), field::fzyx, uint_c(2));
+   ids.flagField = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field", uint_c(3));
+#endif
+
+
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+   SweepCollection_T sweepCollection(blocks, ids.pdfFieldGPU, ids.densityFieldGPU, ids.velocityFieldGPU, drivingForce, omega);
+#else
+   SweepCollection_T sweepCollection(blocks, ids.pdfField, ids.densityField, ids.velocityField, drivingForce, omega);
+#endif
+
+   if(initialiseWithVelocity){
+      for (auto& block : *blocks){
+         auto velocity = block.getData< VelocityField_T >(ids.velocityField);
+         WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(velocity,
+            velocity->get(x, y, z, 0) = referenceVelocity_L;
+         )
+      }
+   }
+
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+   gpu::fieldCpy< gpu::GPUField< VelocityField_T::value_type >, VelocityField_T >(blocks, ids.velocityFieldGPU, ids.velocityField);
+#endif
+
+   WALBERLA_LOG_INFO_ON_ROOT(++step << ". Initialising data")
+   for (auto& block : *blocks){
+      sweepCollection.initialise(&block, cell_idx_c(1));
+   }
+
+   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+   ///                                      LB SWEEPS AND BOUNDARY HANDLING                                       ///
+   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+   auto fluidUID    = FlagUID("Fluid");
+   auto obstacleUID = FlagUID("NoSlip");
+
+   // Boundaries
+   WALBERLA_LOG_INFO_ON_ROOT(++step << ". Setting boundary conditions")
+
+   geometry::initBoundaryHandling< FlagField_T >(*blocks, ids.flagField, boundariesConfig);
+   defineObstacles(blocks, ids.flagField, obstacleUID);
+   geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, ids.flagField, fluidUID, 2);
+
+
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+   BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfFieldGPU, fluidUID, referenceVelocity_L, ids.pdfField);
+   WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+   WALBERLA_GPU_CHECK(gpuPeekAtLastError())
+#else
+   BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfField, fluidUID, referenceVelocity_L);
+#endif
+
+
+   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+   ///                                           COMMUNICATION SCHEME                                             ///
+   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+   WALBERLA_LOG_INFO_ON_ROOT("Setting up communication...")
+   const bool gpuEnabledMPI       = parameters.getParameter< bool >("gpuEnabledMPI", false);
+
+   auto nonUniformCommunication = std::make_shared< NonUniformGPUScheme< CommunicationStencil_T > >(blocks, gpuEnabledMPI);
+   auto nonUniformPackInfo      = lbm_generated::setupNonuniformGPUPdfCommunication< GPUPdfField_T >(blocks, ids.pdfFieldGPU);
+   nonUniformCommunication->addPackInfo(nonUniformPackInfo);
+#else
+   WALBERLA_LOG_INFO_ON_ROOT("Setting up communication...")
+   auto nonUniformCommunication = std::make_shared< NonUniformBufferedScheme< CommunicationStencil_T > >(blocks);
+   auto nonUniformPackInfo      = lbm_generated::setupNonuniformPdfCommunication< PdfField_T >(blocks, ids.pdfField);
+   nonUniformCommunication->addPackInfo(nonUniformPackInfo);
+
+#endif
+
+   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+   ///                                          TIME STEP DEFINITIONS                                             ///
+   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+   SweepTimeloop timeloop(blocks->getBlockStorage(), timesteps);
+
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+   auto LBMRefinement = std::make_shared<RefinementTimeStep>(blocks, ids.pdfFieldGPU, sweepCollection, boundaryCollection, nonUniformCommunication, nonUniformPackInfo);
+   LBMRefinement->addRefinementToTimeLoop(timeloop, uint_c(0));
+#else
+   auto LBMRefinement = std::make_shared<RefinementTimeStep>(blocks, ids.pdfField, sweepCollection, boundaryCollection, nonUniformCommunication, nonUniformPackInfo);
+   LBMRefinement->addRefinementToTimeLoop(timeloop);
+#endif
+
+   auto dxF = blocks->dz(refinementLevels);
+   const AABB sliceXY(finalDomain.xMin(), finalDomain.yMin(), finalDomain.center()[2] - dxF,
+                      finalDomain.xMax(), finalDomain.yMax(), finalDomain.center()[2] + dxF);
+
+
+   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+   ///                                             VTK Output                                                     ///
+   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+   // VTtimeloop.addFuncAfterTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");K
+   auto VTKWriter = config->getOneBlock("VTKWriter");
+   const uint_t vtkWriteFrequency       = VTKWriter.getParameter< uint_t >("vtkWriteFrequency", 0);
+   const real_t sliceThickness          = VTKWriter.getParameter< real_t >("sliceThickness", real_c(1.0));
+
+   if (vtkWriteFrequency > 0){
+      auto vtkOutput =
+         vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
+                                        "simulation_step", false, true, true, false, 0, false, false);
+
+      vtkOutput->addBeforeFunction([&]() {
+         for (auto& block : *blocks){
+            sweepCollection.calculateMacroscopicParameters(&block);
+         }
+      });
+
+
+      field::FlagFieldCellFilter<FlagField_T> fluidFilter( ids.flagField );
+      fluidFilter.addFlag(obstacleUID );
+      vtkOutput->addCellExclusionFilter(fluidFilter);
+
+
+      if (VTKWriter.getParameter< bool >("writeOnlySlice", true)){
+         vtkOutput->addCellInclusionFilter(vtk::AABBCellFilter(sliceXY));
+      }
+
+      if (VTKWriter.getParameter< bool >("velocity")){
+         if (VTKWriter.getParameter< bool >("writeVelocityAsMagnitude")){
+            auto velWriter = make_shared< field::VTKMagnitudeWriter< VelocityField_T, float32 > >(ids.velocityField, "velocityMagnitude");
+            vtkOutput->addCellDataWriter(velWriter);
+         }
+         else{
+            auto velWriter = make_shared< field::VTKWriter< VelocityField_T, float32 > >(ids.velocityField, "velocity");
+            vtkOutput->addCellDataWriter(velWriter);
+         }
+      }
+      if (VTKWriter.getParameter< bool >("density")){
+         auto densityWriter = make_shared< field::VTKWriter< ScalarField_T, float32 > >(ids.densityField, "density");
+         vtkOutput->addCellDataWriter(densityWriter);
+      }
+      if (VTKWriter.getParameter< bool >("flag")){
+         auto flagWriter = make_shared< field::VTKWriter< FlagField_T > >(ids.flagField, "flag");
+         vtkOutput->addCellDataWriter(flagWriter);
+      }
+      timeloop.addFuncAfterTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
+   }
+
+   // log remaining time
+   timeloop.addFuncAfterTimeStep(
+      timing::RemainingTimeLogger(timeloop.getNrOfTimeSteps(), 20),
+      "remaining time logger");
+
+   auto CheckerParameters      = config->getOneBlock("StabilityChecker");
+   const uint_t checkFrequency = CheckerParameters.getParameter< uint_t >("checkFrequency", uint_t(0));
+   if (checkFrequency > 0){
+      auto checkFunction = [](VelocityField_T::value_type value) {  return value < math::abs(VelocityField_T::value_type(10)); };
+      timeloop.addFuncAfterTimeStep(
+         makeSharedFunctor(field::makeStabilityChecker< VelocityField_T, FlagField_T >(
+            config, blocks, ids.velocityField, ids.flagField, fluidUID, checkFunction)),
+         "Stability check");
+   }
+
+   WALBERLA_MPI_BARRIER()
+
+   const lbm_generated::PerformanceEvaluation< FlagField_T > performance(blocks, ids.flagField, fluidUID);
+   field::CellCounter< FlagField_T > fluidCells(blocks, ids.flagField, fluidUID);
+   fluidCells();
+
+   WALBERLA_LOG_INFO_ON_ROOT(++step << ". Starting Simulation with " << timesteps << " timesteps")
+   WcTimingPool timeloopTiming;
+   WcTimer simTimer;
+
+   WALBERLA_MPI_BARRIER()
+
+   simTimer.start();
+   timeloop.run(timeloopTiming);
+   WALBERLA_MPI_BARRIER()
+   simTimer.end();
+   WALBERLA_LOG_INFO_ON_ROOT("Simulation finished")
+   double time = double_c(simTimer.max());
+   WALBERLA_MPI_SECTION() { walberla::mpi::reduceInplace(time, walberla::mpi::MAX); }
+   performance.logResultOnRoot(timesteps, time);
+
+   const auto reducedTimeloopTiming = timeloopTiming.getReduced();
+   WALBERLA_LOG_RESULT_ON_ROOT("Time loop timing:\n" << *reducedTimeloopTiming)
+
+   printResidentMemoryStatistics();
+   return EXIT_SUCCESS;
+}
\ No newline at end of file
diff --git a/apps/showcases/Channel/Channel.prm b/apps/showcases/Channel/Channel.prm
new file mode 100644
index 0000000000000000000000000000000000000000..0c0e5cdb7ab51626d58550247299a1e8b8040d89
--- /dev/null
+++ b/apps/showcases/Channel/Channel.prm
@@ -0,0 +1,56 @@
+Parameters
+{
+    reynoldsNumber 400;
+    referenceVelocity 1.0; // [m / s]
+    latticeVelocity 0.05;
+    initialiseWithVelocity false;
+    drivingForce 0.0;
+
+    timesteps 101;
+
+    // GPU related Parameters, only used if GPU is enabled
+    gpuEnabledMPI false;
+}
+
+DomainSetup
+{
+    cellsPerBlock < 16, 16, 16 >;
+    domainSize    < 6.4, 3.2, 3.2 >;
+    resolution    0.1;
+    periodic    < false, false, true >;
+    refinementLevels 0;
+}
+//! [domainSetup]
+
+Boundaries
+{
+    Border { direction W;    walldistance -1;  flag UBB; }
+    Border { direction E;    walldistance -1;  flag Outflow; }
+    Border { direction N;    walldistance -1;  flag NoSlip; }
+    Border { direction S;    walldistance -1;  flag NoSlip; }
+}
+
+StabilityChecker
+{
+    checkFrequency 0;
+    streamOutput   false;
+    vtkOutput      true;
+}
+
+VTKWriter
+{
+    vtkWriteFrequency 100;
+    velocity true;
+    writeVelocityAsMagnitude false;
+    density false;
+    flag false;
+    writeOnlySlice true;
+    initialWriteCallsToSkip 0;
+}
+
+Logging
+{
+    logLevel info;  // info progress detail tracing
+    writeSetupForestAndReturn false; // When only one process is used the decomposition is writen and the program terminates
+    remainingTimeLoggerFrequency 20; // in seconds
+}
\ No newline at end of file
diff --git a/apps/showcases/Channel/Channel.py b/apps/showcases/Channel/Channel.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd3778f3cbd1bedc6d2bfe570f844de66f309b8e
--- /dev/null
+++ b/apps/showcases/Channel/Channel.py
@@ -0,0 +1,117 @@
+import sympy as sp
+from pystencils import Target
+
+from pystencils.field import fields
+from pystencils.simp import insert_aliases, insert_constants
+
+from lbmpy import LBStencil, LBMConfig, LBMOptimisation
+from lbmpy.boundaries.boundaryconditions import NoSlip, NoSlipLinearBouzidi, ExtrapolationOutflow, UBB, FixedDensity
+from lbmpy.creationfunctions import create_lb_collision_rule
+from lbmpy.enums import Method, Stencil, ForceModel
+from lbmpy.flow_statistics import welford_assignments
+
+from pystencils_walberla import CodeGeneration, generate_info_header, generate_sweep
+from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator
+from lbmpy_walberla import RefinementScaling
+
+info_header = """
+#pragma once
+#include <map>
+std::map<std::string, std::string> infoMap{{{{"stencil", "{stencil}"}},
+                                           {{"streamingPattern", "{streaming_pattern}"}}, 
+                                           {{"collisionOperator", "{collision_operator}"}}}};
+"""
+
+omega = sp.symbols("omega")
+inlet_velocity = sp.symbols("u_x")
+force_vector = tuple([sp.Symbol("Fx"), 0.0, 0.0])
+max_threads = 256
+ 
+with CodeGeneration() as ctx:
+    target = Target.GPU if ctx.gpu else Target.CPU
+    name_prefix = "Channel"
+    sweep_params = {'block_size': (128, 1, 1)} if ctx.gpu else {}
+
+    # The application is meant to be compiled with double precision. For single precision, the pdf_dtype can be switched
+    # to float32. In this way the calculations are still performed in double precision while the application can profit
+    # from enhanced performance due to the lower precision of the PDF field
+    dtype = 'float64' if ctx.double_accuracy else 'float32'
+    pdf_dtype = 'float64'
+
+    stencil = LBStencil(Stencil.D3Q19)
+    q = stencil.Q
+    dim = stencil.D
+
+    streaming_pattern = 'aa'
+
+    pdfs = fields(f"pdfs({stencil.Q}): {pdf_dtype}[3D]", layout='fzyx')
+    velocity_field, density_field = fields(f"velocity({dim}), density(1) : {dtype}[{dim}D]", layout='fzyx')
+    mean_velocity = fields(f"mean_velocity({dim}): {dtype}[{stencil.D}D]", layout='fzyx')
+
+    macroscopic_fields = {'density': density_field, 'velocity': velocity_field}
+
+    # method_enum = Method.CENTRAL_MOMENT
+    method_enum = Method.SRT
+    lbm_config = LBMConfig(
+        method=method_enum,
+        stencil=stencil,
+        relaxation_rate=omega,
+        compressible=True,
+        force=force_vector,
+        # subgrid_scale_model=SubgridScaleModel.QR,
+        fourth_order_correction=0.01 if method_enum == Method.CUMULANT and stencil.Q == 27 else False,
+        field_name='pdfs',
+        streaming_pattern=streaming_pattern
+    )
+
+    lbm_opt = LBMOptimisation(cse_global=False, cse_pdfs=False, field_layout="fzyx",
+                              symbolic_field=pdfs)
+
+    collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
+    if lbm_config.method == Method.CUMULANT:
+        collision_rule = insert_constants(collision_rule)
+        collision_rule = insert_aliases(collision_rule)
+    lb_method = collision_rule.method
+
+    no_slip = lbm_boundary_generator(class_name='NoSlip', flag_uid='NoSlip',
+                                     boundary_object=NoSlip(), field_data_type=pdf_dtype)
+ 
+    quadratic_bounce_back = NoSlipLinearBouzidi(calculate_force_on_boundary=True, data_type=dtype)
+    no_slip_interpolated = lbm_boundary_generator(class_name='Obstacle', flag_uid='Obstacle',
+                                                  boundary_object=quadratic_bounce_back,
+                                                  field_data_type=pdf_dtype)
+
+    ubb = lbm_boundary_generator(class_name='UBB', flag_uid='UBB',
+                                 boundary_object=UBB(velocity=(inlet_velocity, 0.0, 0.0), density=1.0, data_type=dtype, dim=dim),
+                                 field_data_type=pdf_dtype)
+
+    outflow_boundary = ExtrapolationOutflow(stencil[4], lb_method, data_type=pdf_dtype)
+    outflow = lbm_boundary_generator(class_name='Outflow', flag_uid='Outflow',
+                                     boundary_object=outflow_boundary,
+                                     field_data_type=pdf_dtype)
+
+    refinement_scaling = RefinementScaling()
+    refinement_scaling.add_standard_relaxation_rate_scaling(omega)
+    refinement_scaling.add_force_scaling(sp.Symbol("Fx"))
+
+    generate_lbm_package(ctx, name=name_prefix, collision_rule=collision_rule,
+                         lbm_config=lbm_config, lbm_optimisation=lbm_opt,
+                         nonuniform=True, refinement_scaling=refinement_scaling, boundaries=[no_slip, ubb, outflow],
+                         macroscopic_fields=macroscopic_fields, gpu_indexing_params=sweep_params,
+                         target=target, data_type=dtype, pdfs_data_type=pdf_dtype,
+                         max_threads=max_threads)
+    
+    field_typedefs = {'VelocityField_T': velocity_field,
+                      'ScalarField_T': density_field}
+
+    # Info header containing correct template definitions for stencil and field
+    generate_info_header(ctx, f'{name_prefix}InfoHeader',
+                         field_typedefs=field_typedefs)
+
+    infoHeaderParams = {
+        'stencil': stencil.name.lower(),
+        'streaming_pattern': streaming_pattern,
+        'collision_operator': lbm_config.method.name.lower(),
+    }
+
+    ctx.write_file(f"{name_prefix}StaticDefines.h", info_header.format(**infoHeaderParams))
\ No newline at end of file
diff --git a/apps/showcases/PorousMediaGPU/InitializerFunctions.cpp b/apps/showcases/PorousMediaGPU/InitializerFunctions.cpp
index 3db223d2095b342163fedec21e3d9ce9dd59e3f1..b0c7c191dd9d82d587fb23de243697da0c8e60ab 100644
--- a/apps/showcases/PorousMediaGPU/InitializerFunctions.cpp
+++ b/apps/showcases/PorousMediaGPU/InitializerFunctions.cpp
@@ -86,7 +86,7 @@ void initHemisphers(const shared_ptr< StructuredBlockStorage >& blocks, const Bl
          if(cellCenter[1] > radius && cellCenter[1] < yMax - radius){
             continue;
          }
-         auto center = getCenterOfHemisphere(cellCenter, radius, hemispheresDistance, yMin, yMax);
+         auto center = getCenterOfHemisphere(cellCenter, hemispheresDistance, yMin, yMax);
          if(hemispheresContains(cellCenter, center, radius)) {
             addFlag(flagField->get(it.x(), it.y(), it.z()), boundaryFlag);
          }
@@ -94,4 +94,53 @@ void initHemisphers(const shared_ptr< StructuredBlockStorage >& blocks, const Bl
    }
 }
 
+void setVelocityFieldsAsmuth(const shared_ptr< StructuredBlockStorage >& blocks,
+                             const Setup& setup, const IDs& ids) {
+
+   const real_t B = real_c(5.5);
+   const real_t kappa = real_c(0.41);
+
+   auto domainSize = blocks->getDomain().max();
+   domainSize /= setup.dxC;
+
+   const real_t frictionVelocity = setup.frictionVelocity;
+
+   for( auto block = blocks->begin(); block != blocks->end(); ++block ) {
+      const uint_t level = blocks->getLevel(*block.get());
+      const real_t levelScaleFactor = real_c(uint_c(1) << level);
+      const real_t dx = setup.dxC / (real_c(1.0) / levelScaleFactor);
+
+      auto * velocityField = block->template getData<VelocityField_T>(ids.velocityField);
+
+      const auto ci = velocityField->xyzSizeWithGhostLayer();
+      for(auto cellIt = ci.begin(); cellIt != ci.end(); ++cellIt) {
+
+         Cell globalCell(*cellIt);
+         blocks->transformBlockLocalToGlobalCell(globalCell, *block);
+         Vector3<real_t> cellCenter;
+         blocks->getCellCenter(cellCenter, globalCell);
+
+         const auto rel_x = cellCenter[0] / domainSize[0];
+         const auto rel_y = cellCenter[1] / domainSize[1];
+         const auto rel_z = cellCenter[2] / domainSize[2];
+
+         const real_t t1 = std::sin(math::pi * 8.0 / dx * rel_z) * std::sin(math::pi * 8.0 / dx * rel_y) + std::sin(math::pi * 8.0 / dx * rel_x);
+         const real_t t2 = real_c(1.0); // (np.power(0.5 * delta - pos, 2.0) + 1.0);
+         const real_t t3 = std::sin(math::pi * 16.0_r / dx * rel_x) * std::sin(math::pi * 8.0_r / dx * rel_y);
+
+         Vector3<real_t> vel;
+
+         vel[0] = setup.latticeVelocity;
+         vel[1] = 5.0_r * frictionVelocity / kappa * t1 / t2;
+         vel[2] = 2.0_r * frictionVelocity / kappa * t3 / (std::pow(rel_y, 2.0_r) + 1.0_r);
+
+         velocityField->get(*cellIt, 0) = vel[0];
+         velocityField->get(*cellIt, 1) = vel[1];
+         velocityField->get(*cellIt, 2) = vel[2];
+
+      }
+   }
+
+} // function setVelocityFieldsHenrik
+
 } // namespace walberla
diff --git a/apps/showcases/PorousMediaGPU/InitializerFunctions.h b/apps/showcases/PorousMediaGPU/InitializerFunctions.h
index 1499a29ac228ffa16c33cd788ed619ca73a90a68..cb69db2eb743ff15e024424b54c5fb68121203c4 100644
--- a/apps/showcases/PorousMediaGPU/InitializerFunctions.h
+++ b/apps/showcases/PorousMediaGPU/InitializerFunctions.h
@@ -26,11 +26,16 @@
 #include "field/communication/PackInfo.h"
 #include "field/vtk/VTKWriter.h"
 
+#include "lbm_generated/refinement/RefinementScaling.h"
+#include "lbm_generated/util.h"
+
 #include "Setup.h"
 #include "util.h"
+#include "Types.h"
 
 namespace walberla{
 void initGrid(const shared_ptr< StructuredBlockStorage >& blocks, const BlockDataID flagFieldID, const Setup& setup);
 void initHemisphers(const shared_ptr< StructuredBlockStorage >& blocks, const BlockDataID flagFieldID, const Setup& setup, const Config::BlockHandle& wallParameters);
 bool hemispheresContains(const Vector3<real_t>& point, const Vector3<real_t>& center, const real_t radiusSquared);
+void setVelocityFieldsAsmuth(const shared_ptr< StructuredBlockStorage >& blocks, const Setup& setup, const IDs& ids);
 } // namespace walberla
diff --git a/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp b/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp
index 84b9d75a4e8ad99a13f9e188044327920c7f002e..387d10542b73a1b0b5e0abe6abd2e208e64bd48e 100644
--- a/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp
+++ b/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp
@@ -124,7 +124,7 @@ class HemisphereWallDistance
       Vector3< real_t > boundary = SbF->getBlockLocalCellCenter( block, boundaryCell );
       Vector3< real_t > fluid    = SbF->getBlockLocalCellCenter( block, fluidCell );
 
-      auto center = getCenterOfHemisphere(boundary, radius_, hemispheresDistance_, yMin, yMax);
+      auto center = getCenterOfHemisphere(boundary, hemispheresDistance_, yMin, yMax);
 
       hemispheresContains(boundary, center, radius_);
 
@@ -287,6 +287,9 @@ int main(int argc, char** argv){
          )
       }
    }
+
+   setVelocityFieldsAsmuth(blocks, setup, ids);
+
    gpu::fieldCpy< gpu::GPUField< VelocityField_T::value_type >, VelocityField_T >(blocks, ids.velocityFieldGPU, ids.velocityField);
 
    for (auto& block : *blocks){
diff --git a/apps/showcases/PorousMediaGPU/Setup.h b/apps/showcases/PorousMediaGPU/Setup.h
index 13fe8a56a355709bd71a3dde4ce04d1d1b79eaba..09d0723dbf70d78463efc97b642ec0f10df56bb1 100644
--- a/apps/showcases/PorousMediaGPU/Setup.h
+++ b/apps/showcases/PorousMediaGPU/Setup.h
@@ -65,6 +65,7 @@ struct Setup
       reynoldsNumber                = parameters.getParameter< real_t >("reynoldsNumber");
       latticeVelocity               = parameters.getParameter< real_t >("latticeVelocity");
       initialiseWithlatticeVelocity = parameters.getParameter< bool >("initialiseWithlatticeVelocity");
+      frictionVelocity              = real_c(0.0);
 
       useGrid               = turbulence.getParameter< bool >("useGrid");
       turbulentInflow       = turbulence.getParameter< bool >("turbulentInflow");
@@ -88,8 +89,7 @@ struct Setup
          machNumber = latticeVelocity / speedOfSound;
          omega      = real_c(real_c(1.0) / (real_c(3.0) * viscosity + real_c(0.5)));
 
-         const real_t frictionVelocity = (reynoldsNumber * viscosity) / latticeReferenceLength;
-
+         frictionVelocity = (reynoldsNumber * viscosity) / latticeReferenceLength;
          drivingForce = calculateDrivingForce(frictionVelocity, latticeReferenceLength);
       }
       else{
@@ -151,6 +151,7 @@ struct Setup
    real_t referenceVelocity;
    real_t referenceLength;
    real_t latticeVelocity;
+   real_t frictionVelocity;
    bool initialiseWithlatticeVelocity;
 
    bool useGrid;
diff --git a/apps/showcases/PorousMediaGPU/roughWall.prm b/apps/showcases/PorousMediaGPU/roughWall.prm
index 23e69c1e165703207d9832e2104f2201c8aaef8b..a23472152e1ed181709a8ab8aa16f4a624a898fa 100644
--- a/apps/showcases/PorousMediaGPU/roughWall.prm
+++ b/apps/showcases/PorousMediaGPU/roughWall.prm
@@ -6,7 +6,7 @@ Parameters
     latticeVelocity 0.05;
     initialiseWithlatticeVelocity true;
 
-    timesteps 800001;
+    timesteps 1;
 
     processMemoryLimit 512; // MiB
     innerOuterSplit <1, 1, 1>;
@@ -63,7 +63,7 @@ VTKWriter
     vtkWriteFrequency 1000;
     vtkGhostLayers 0;
     velocity true;
-    writeVelocityAsMagnitude true;
+    writeVelocityAsMagnitude false;
     density false;
     meanVelocity false;
     flag false;
@@ -73,7 +73,7 @@ VTKWriter
     amrFileFormat false;
     oneFilePerProcess false;
     samplingResolution -1;
-    initialWriteCallsToSkip 500000;
+    initialWriteCallsToSkip 0;
 }
 
 Logging
diff --git a/apps/showcases/PorousMediaGPU/util.cpp b/apps/showcases/PorousMediaGPU/util.cpp
index 514d37d3451c97673a641e7fb5ce96d3bf35218c..bc7e9ceae4cb5808330ebd68c0fe9440055d73be 100644
--- a/apps/showcases/PorousMediaGPU/util.cpp
+++ b/apps/showcases/PorousMediaGPU/util.cpp
@@ -87,7 +87,7 @@ real_t getKarmanPaoEnergy(const real_t k, const real_t alpha, const real_t kappa
     return a1 * a2 / a3 * a4;
 }
 
-Vector3<real_t> getCenterOfHemisphere(const Vector3<real_t>& p, const real_t hemispheresRadius, const real_t hemispheresDistance,
+Vector3<real_t> getCenterOfHemisphere(const Vector3<real_t>& p, const real_t hemispheresDistance,
                                       const real_t domainYMin, const real_t domainYMax){
 
     const real_t spaceX     = hemispheresDistance / real_c(2.0) * math::root_three;
diff --git a/apps/showcases/PorousMediaGPU/util.h b/apps/showcases/PorousMediaGPU/util.h
index 82e804bfd13f008aec7178fcea11fcb149112567..2e05b671b3b9553c8206712cea633ee7850413e3 100644
--- a/apps/showcases/PorousMediaGPU/util.h
+++ b/apps/showcases/PorousMediaGPU/util.h
@@ -31,7 +31,7 @@ Vector3<real_t> summOfFourrierModes(const Vector3<real_t>& p, const Vector3<real
 
 real_t getKarmanPaoEnergy(const real_t k, const real_t alpha, const real_t kappaVKP, const real_t kappaKolmogorov, const real_t uRMS);
 
-Vector3<real_t> getCenterOfHemisphere(const Vector3<real_t>& p, const real_t hemispheresRadius, const real_t hemispheresDistance,
+Vector3<real_t> getCenterOfHemisphere(const Vector3<real_t>& p, const real_t hemispheresDistance,
                                       const real_t domainYMin, const real_t domainYMax);
 
 bool hemispheresContains(const Vector3<real_t>& point, const Vector3<real_t>& center, const real_t radius);
diff --git a/apps/showcases/PorousMediaGPU/utilGPU.cu b/apps/showcases/PorousMediaGPU/utilGPU.cu
index abafc7efac81b36868cf6432a48cd3e6599ff0cc..d6b56626cfe5c67ba92bfedc021648c1ff64e3b0 100644
--- a/apps/showcases/PorousMediaGPU/utilGPU.cu
+++ b/apps/showcases/PorousMediaGPU/utilGPU.cu
@@ -18,7 +18,7 @@
 //
 //======================================================================================================================
 #include "utilGPU.h"
-#include <curand_kernel.h>
+// #include <curand_kernel.h>
 
 namespace walberla
 {
@@ -75,7 +75,7 @@ static __device__ void singleMode(double* rX, double* rY, double* rZ, const doub
 {
 
    const real_t pi2 = 6.28318530717958647693;
-
+/*
    curandStatePhilox4_32_10_t state;
    curand_init(42, linear_index, 0, &state);
 
@@ -88,16 +88,16 @@ static __device__ void singleMode(double* rX, double* rY, double* rZ, const doub
    const real_t zetaX = real_t(2.0) * real_t(r1.w) - real_t(1.0);
    const real_t zetaY = real_t(2.0) * real_t(r2.x) - real_t(1.0);
    const real_t zetaZ = real_t(2.0) * real_t(r2.y) - real_t(1.0);
-   
-/*
+*/  
+
    // For debugging
    const real_t a     = 0.1; // math::realRandom(real_c(0.0), real_c(1.0));
-   const real_t phi   = 0.2; //math::realRandom(real_c(0.0), real_c(1.0)) * pi2;
-   const real_t psi   = 0.3; //math::realRandom(real_c(0.0), real_c(1.0)) * pi2;
+   const real_t phi   = 0.2 * pi2; //math::realRandom(real_c(0.0), real_c(1.0)) * pi2;
+   const real_t psi   = 0.3 * pi2; //math::realRandom(real_c(0.0), real_c(1.0)) * pi2;
    const real_t zetaX = -0.4; //math::realRandom(real_c(-1.0), real_c(1.0));
    const real_t zetaY = -0.6; //math::realRandom(real_c(-1.0), real_c(1.0));
    const real_t zetaZ = 0.8; // math::realRandom(real_c(-1.0), real_c(1.0));
-*/
+
 
    const real_t theta = acos(real_t(2.0) * a - real_t(1.0));
 
@@ -248,6 +248,8 @@ void updateVelocityPlane(gpu::GPUField<real_t>* velocityGPU, real_t* kappa, real
                          const real_t dx, const real_t t, const uint_t n, const real_t latticeVelocity, const int64_t nGhostLayers,
                          const Vector3<uint32_t>& block, const Vector3<uint32_t>& grid){
 
+   WALBERLA_ABORT("Turbulent Inflow not usuable at the moment")
+
    dim3 block_(block[0], block[1], block[2]);
    dim3 grid_(grid[0], grid[1], grid[2]);
 
diff --git a/src/lbm_generated/CMakeLists.txt b/src/lbm_generated/CMakeLists.txt
index 2513a58f2e646025fa86107409058a7576a3f62f..ce6473eb048068a711d6680bd0d1a889404f35e6 100644
--- a/src/lbm_generated/CMakeLists.txt
+++ b/src/lbm_generated/CMakeLists.txt
@@ -15,6 +15,11 @@ target_link_libraries( lbm_generated
         vtk
         )
 
+target_sources( lbm_generated
+      PRIVATE
+      util.h
+      )
+
 add_subdirectory( boundary )
 add_subdirectory( communication )
 add_subdirectory( gpu )
diff --git a/src/lbm_generated/util.h b/src/lbm_generated/util.h
new file mode 100644
index 0000000000000000000000000000000000000000..609377c8ea6dc1b9705624b141e8135b5714746f
--- /dev/null
+++ b/src/lbm_generated/util.h
@@ -0,0 +1,40 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file util.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "core/DataTypes.h"
+#include "core/math/Vector3.h"
+
+namespace walberla::lbm_generated
+{
+
+inline real_t viscosityFromOmega( const real_t omega )
+{
+   static const real_t one_third = real_c(1.0) / real_c(3.0);
+   return ( real_c(1.0) / omega - real_c(0.5) ) * one_third;
+}
+
+inline real_t omegaFromViscosity( const real_t viscosity )
+{
+   return real_c(1.0) / ( real_c(0.5) + real_c(3.0) * viscosity );
+}
+
+} // namespace walberla::lbm_generated
\ No newline at end of file