diff --git a/apps/showcases/CMakeLists.txt b/apps/showcases/CMakeLists.txt
index 6f14223a16c0634db9e84e9e4ed946aa04fde4e2..77dcbf23f65d491852008bd33765acda015d8c95 100644
--- a/apps/showcases/CMakeLists.txt
+++ b/apps/showcases/CMakeLists.txt
@@ -11,6 +11,7 @@ if ( WALBERLA_BUILD_WITH_CODEGEN)
 
    add_subdirectory( Antidunes )
    add_subdirectory( FlowAroundSphere )
+   add_subdirectory( FlowAroundSphereCPU )
    add_subdirectory( FlowAroundCylinder )
    add_subdirectory( Channel )
    add_subdirectory( TaylorGreenVortex )
diff --git a/apps/showcases/FlowAroundSphereCPU/CMakeLists.txt b/apps/showcases/FlowAroundSphereCPU/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0766549e0f4c6b7e20eb70139daa309085444581
--- /dev/null
+++ b/apps/showcases/FlowAroundSphereCPU/CMakeLists.txt
@@ -0,0 +1,24 @@
+waLBerla_link_files_to_builddir( *.py )
+waLBerla_link_files_to_builddir( *.prm )
+waLBerla_link_files_to_builddir( *.png )
+waLBerla_link_files_to_builddir( *.obj )
+waLBerla_link_files_to_builddir( *.stl )
+waLBerla_link_files_to_builddir( *.mtl )
+
+waLBerla_generate_target_from_python(NAME FlowAroundSphereCPUCodeGen
+        FILE FlowAroundSphereCPU.py
+        OUT_FILES FlowAroundSphereCPUSweepCollection.h FlowAroundSphereCPUSweepCollection.cpp
+        FlowAroundSphereCPUStorageSpecification.h FlowAroundSphereCPUStorageSpecification.cpp
+        FreeSlip.h FreeSlip.cpp
+        NoSlip.h NoSlip.cpp
+        Obstacle.h Obstacle.cpp
+        UBB.h UBB.cpp
+        Outflow.h Outflow.cpp
+        FixedDensity.h FixedDensity.cpp
+        FlowAroundSphereCPUBoundaryCollection.h
+        FlowAroundSphereInfoHeader.h
+        FlowAroundSphereStaticDefines.h)
+
+waLBerla_add_executable ( NAME FlowAroundSphereCPU
+        FILES FlowAroundSphereCPU.cpp Sphere.cpp Evaluation.cpp GridGeneration.h Types.h
+        DEPENDS blockforest boundary core field lbm_generated stencil timeloop vtk FlowAroundSphereCPUCodeGen )
diff --git a/apps/showcases/FlowAroundSphereCPU/Evaluation.cpp b/apps/showcases/FlowAroundSphereCPU/Evaluation.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9145b7fe82614ae34ce9a7f7734aa18a036ce646
--- /dev/null
+++ b/apps/showcases/FlowAroundSphereCPU/Evaluation.cpp
@@ -0,0 +1,359 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file Evaluation.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#include "Evaluation.h"
+
+
+namespace walberla
+{
+
+void Evaluation::operator()()
+{
+   if (checkFrequency_ == uint_t(0)) return;
+   ++coarseExecutionCounter_;
+   if (rampUpTime_ > coarseExecutionCounter_) return;
+
+   real_t pressureDifference_L(real_t(0));
+   real_t pressureDifference(real_t(0));
+
+   evaluate(pressureDifference_L, pressureDifference);
+
+   if ((coarseExecutionCounter_ - uint_c(1)) % checkFrequency_ != 0) return;
+
+   WALBERLA_CHECK_NOT_NULLPTR(blocks_)
+
+   // Strouhal number (needs vortex shedding frequency)
+
+   real_t vortexVelocity(real_t(0));
+
+   if (setup_.evaluateStrouhal){
+      auto block = blocks_->getBlock(setup_.pStrouhal);
+      if (block != nullptr){
+         const VelocityField_T* const velocityField = block->template getData< VelocityField_T >(ids_.velocityField);
+         const auto cell                            = blocks_->getBlockLocalCell(*block, setup_.pStrouhal);
+         WALBERLA_ASSERT(velocityField->xyzSize().contains(cell))
+         vortexVelocity += velocityField->get(cell.x(), cell.y(), cell.z(), cell_idx_c(0));
+      }
+      mpi::reduceInplace(vortexVelocity, mpi::SUM);
+   }
+
+   WALBERLA_ROOT_SECTION()
+   {
+      for(auto it = dragResults.begin(); it != dragResults.end(); ++it){
+         coefficients_[0].push_back(it->cDRealArea);
+         coefficients_[1].push_back(it->cLRealArea);
+         coefficients_[2].push_back(it->cDDiscreteArea);
+         coefficients_[3].push_back(it->cLDiscreteArea);
+      }
+
+      if (coefficients_[0].size() > setup_.nbrOfEvaluationPointsForCoefficientExtremas){
+         for (uint_t i = uint_t(0); i < uint_t(4); ++i)
+            coefficients_[i].pop_front();
+      }
+
+      for (uint_t i = uint_t(0); i < uint_t(4); ++i){
+         coefficientExtremas_[i] = std::make_pair(*(coefficients_[i].begin()), *(coefficients_[i].begin()));
+         for (auto v = coefficients_[i].begin(); v != coefficients_[i].end(); ++v){
+            coefficientExtremas_[i].first  = std::min(coefficientExtremas_[i].first, *v);
+            coefficientExtremas_[i].second = std::max(coefficientExtremas_[i].second, *v);
+         }
+      }
+
+      std::ostringstream oss;
+
+      if (logToStream_ and !dragResults.empty()){
+         WALBERLA_LOG_RESULT_ON_ROOT(
+            "force acting on sphere (in dimensionless lattice units of the coarsest grid - evaluated in time step "
+            << coarseExecutionCounter_ - uint_c(1) << "):\n   " << force_ << oss.str()
+            << "\ndrag and lift coefficients (including extrema of last " << (coefficients_[0].size() * checkFrequency_)
+            << " time steps):"
+               "\n   \"real\" area:"
+               "\n      c_D: "
+            << dragResults.back().cDRealArea << " (min = " << coefficientExtremas_[0].first << ", max = " << coefficientExtremas_[0].second
+            << ")"
+            << "\n      c_L: " << dragResults.back().cLRealArea << " (min = " << coefficientExtremas_[1].first
+            << ", max = " << coefficientExtremas_[1].second << ")"
+            << "\n   discrete area:"
+               "\n      c_D: "
+            << dragResults.back().cDDiscreteArea << " (min = " << coefficientExtremas_[2].first
+            << ", max = " << coefficientExtremas_[2].second << ")"
+            << "\n      c_L: " << dragResults.back().cLDiscreteArea << " (min = " << coefficientExtremas_[3].first
+            << ", max = " << coefficientExtremas_[3].second << ")")
+      }
+
+      if (setup_.evaluatePressure && logToStream_){
+         WALBERLA_LOG_RESULT_ON_ROOT("pressure:\n   difference: " << pressureDifference << " Pa ("<< pressureDifference_L << ")")
+      }
+
+      if (setup_.evaluateStrouhal)
+      {
+         // We evaluate the derivative (-> strouhalRising_) in order to find the local minima and maxima of the velocity
+         // over time. If we know the time between a local minimum and a local maximum, we can calculate the frequency.
+         // -> "Smooth noise-robust differentiators"
+         // (http://www.holoborodko.com/pavel/numerical-methods/numerical-derivative/smooth-low-noise-differentiators/)
+
+         if (strouhalVelocities_.size() < uint_t(11)) { strouhalVelocities_.push_back(vortexVelocity); }
+         else{
+            for (uint_t i = uint_t(0); i < uint_t(10); ++i)
+               strouhalVelocities_[i] = strouhalVelocities_[i + 1];
+            strouhalVelocities_[10] = vortexVelocity;
+
+            const real_t f1 = strouhalVelocities_[6] - strouhalVelocities_[4];
+            const real_t f2 = strouhalVelocities_[7] - strouhalVelocities_[3];
+            const real_t f3 = strouhalVelocities_[8] - strouhalVelocities_[2];
+            const real_t f4 = strouhalVelocities_[9] - strouhalVelocities_[1];
+            const real_t f5 = strouhalVelocities_[10] - strouhalVelocities_[0];
+
+            const real_t diff =
+               (real_c(322) * f1 + real_c(256) * f2 + real_c(39) * f3 - real_c(32) * f4 - real_c(11) * f5) /
+               real_c(1536);
+
+            if ((diff > real_t(0)) != strouhalRising_){
+               strouhalRising_ = (diff > real_t(0));
+
+               if (strouhalTimeStep_.size() < uint_t(3)) { strouhalTimeStep_.push_back(coarseExecutionCounter_); }
+               else{
+                  strouhalTimeStep_[0] = strouhalTimeStep_[1];
+                  strouhalTimeStep_[1] = strouhalTimeStep_[2];
+                  strouhalTimeStep_[2] = coarseExecutionCounter_;
+               }
+            }
+         }
+
+         if (strouhalTimeStep_.size() == uint_t(3)){
+            strouhalNumberRealD_     = diameterSphere / (meanVelocity * real_c(strouhalTimeStep_[2] - strouhalTimeStep_[0]));
+            strouhalEvaluationExecutionCount_ = coarseExecutionCounter_ - uint_t(1);
+
+            if (logToStream_){
+               WALBERLA_LOG_RESULT_ON_ROOT(
+                  "Strouhal number (evaluated in time step "
+                  << strouhalEvaluationExecutionCount_
+                  << "):"
+                     "\n   D/U (in lattice units): "
+                  << (diameterSphere / meanVelocity) << " (\"real\" D), "
+                  << "\n   T: "
+                  << (real_c(strouhalTimeStep_[2] - strouhalTimeStep_[0]) * setup_.dt) << " s ("
+                  << real_c(strouhalTimeStep_[2] - strouhalTimeStep_[0])
+                  << ")"
+                     "\n   f: "
+                  << (real_t(1) / (real_c(strouhalTimeStep_[2] - strouhalTimeStep_[0]) * setup_.dt)) << " Hz ("
+                  << (real_t(1) / real_c(strouhalTimeStep_[2] - strouhalTimeStep_[0]))
+                  << ")"
+                     "\n   St (\"real\" D):   "
+                  << strouhalNumberRealD_ << "\n")
+            }
+         }
+      }
+
+      std::ofstream outfile( dragFilename_.c_str(), std::ios_base::app );
+
+      for(auto it = dragResults.begin(); it != dragResults.end(); ++it){
+         outfile << it->timestep << ",";
+         outfile << it->Fx << "," << it->Fy << "," << it->Fz << ",";
+         outfile << it->cDRealArea << ",";
+         outfile << it->cLRealArea << ",";
+         outfile << it->cDDiscreteArea << ",";
+         outfile << it->cLDiscreteArea;
+         outfile << "\n";
+      }
+      outfile.close();
+
+      if (logToFile_ and !dragResults.empty()){
+         std::ofstream file(filename_.c_str(), std::ios_base::app);
+         file << coarseExecutionCounter_ - uint_t(1) << " " << force_[0] << " " << force_[1] << " " << force_[2] << " "
+              << dragResults.back().cDRealArea << " " << dragResults.back().cLRealArea << " " << dragResults.back().cDDiscreteArea << " " << dragResults.back().cLDiscreteArea << " "
+              << pressureDifference_L << " " << pressureDifference << " " << vortexVelocity << " "
+              << strouhalNumberRealD_ << '\n';
+         file.close();
+      }
+      dragResults.clear();
+   }
+
+   // WALBERLA_MPI_WORLD_BARRIER();
+}
+
+void Evaluation::resetForce()
+{
+   if (!initialized_) refresh();
+}
+
+void Evaluation::forceCalculation(const uint_t level)
+{
+   if (rampUpTime_ > coarseExecutionCounter_) return;
+
+   if(level == maxLevel_){
+      for (auto b : finestBlocks_){
+         force_ += Vector3<double>(boundaryCollection_.ObstacleObject->getForce(b));
+      }
+
+      mpi::reduceInplace(force_, mpi::SUM);
+      WALBERLA_ROOT_SECTION(){
+         const double meanU2 = double_c(meanVelocity) * double_c(meanVelocity);
+
+         const double cDRealArea = (double_c(8.0) * force_[0]) / (meanU2 * double_c(surfaceAreaSphere));
+         const double cLRealArea = (double_c(8.0) * force_[1]) / (meanU2 * double_c(surfaceAreaSphere));
+         const double cDDiscreteArea = (double_c(8.0) * force_[0]) / (meanU2 * double_c(AD_));
+         const double cLDiscreteArea = (double_c(8.0) * force_[1]) / (meanU2 * double_c(AL_));
+
+         DragCoefficient DC(fineExecutionCounter_, force_, cDRealArea, cLRealArea, cDDiscreteArea, cLDiscreteArea);
+         dragResults.push_back(DC);
+
+         fineExecutionCounter_++;
+      }
+   }
+
+   force_[0] = double_c(0.0);
+   force_[1] = double_c(0.0);
+   force_[2] = double_c(0.0);
+}
+
+void Evaluation::refresh()
+{
+   WALBERLA_CHECK_NOT_NULLPTR(blocks_)
+   const uint_t finestLevel = blocks_->getDepth();
+
+   real_t AD(real_t(0));
+   real_t AL(real_t(0));
+   for (auto block = blocks_->begin(); block != blocks_->end(); ++block){
+      const uint_t blockLevel = blocks_->getLevel(*block);
+      const FlagField_T* const flagField = block->template getData< FlagField_T >(ids_.flagField);
+
+      auto fluid    = flagField->getFlag(setup_.fluidUID);
+      auto obstacle = flagField->getFlag(setup_.obstacleUID);
+      const real_t area  = real_c(4.0);
+
+      auto xyzSize = flagField->xyzSize();
+      for (auto z = xyzSize.zMin(); z <= xyzSize.zMax(); ++z){
+         for (auto y = xyzSize.yMin(); y <= xyzSize.yMax(); ++y){
+            for (auto x = xyzSize.xMin(); x <= xyzSize.xMax(); ++x){
+               if (flagField->isFlagSet(x, y, z, fluid)){
+                  for (auto it = Stencil_T::beginNoCenter(); it != Stencil_T::end(); ++it){
+                     auto nx = x + cell_idx_c(it.cx());
+                     auto ny = y + cell_idx_c(it.cy());
+                     auto nz = z + cell_idx_c(it.cz());
+
+                     if (flagField->isFlagSet(nx, ny, nz, obstacle)){
+                        WALBERLA_CHECK(blockLevel == finestLevel, "The sphere must be completely located on the finest level")
+                        if (it.cx() == 1 && it.cy() == 0 && it.cz() == 0) { AD += area; }
+                        else if (it.cx() == 0 && it.cz() == 0){
+                           if (it.cy() == 1){
+                              AL += area;
+                           }
+                        }
+                     }
+                  }
+               }
+            }
+         }
+      }
+   }
+   mpi::reduceInplace(AD, mpi::SUM);
+   mpi::reduceInplace(AL, mpi::SUM);
+
+   WALBERLA_ROOT_SECTION(){
+      AD_ = AD;
+      AL_ = AL;
+   }
+
+   // Check if points alpha and omega (required for evaluating the pressure difference) are located in fluid cells
+   if (setup_.evaluatePressure){
+      auto block = blocks_->getBlock(setup_.pAlpha);
+      if (block != nullptr){
+         const FlagField_T* const flagField = block->template getData< FlagField_T >(ids_.flagField);
+
+         const auto cell = blocks_->getBlockLocalCell(*block, setup_.pAlpha);
+         WALBERLA_ASSERT(flagField->xyzSize().contains(cell))
+
+         const auto fluid = flagField->getFlag(setup_.fluidUID);
+         if (!flagField->isFlagSet(cell, fluid)){
+            WALBERLA_ABORT("Cell for evaluating pressure difference at point alpha " << setup_.pAlpha << " is not a fluid cell!")
+         }
+      }
+
+      block = blocks_->getBlock(setup_.pOmega);
+      if (block != nullptr){
+         const FlagField_T* const flagField = block->template getData< FlagField_T >(ids_.flagField);
+
+         const auto cell = blocks_->getBlockLocalCell(*block, setup_.pOmega);
+         WALBERLA_ASSERT(flagField->xyzSize().contains(cell))
+
+         const auto fluid = flagField->getFlag(setup_.fluidUID);
+         if (!flagField->isFlagSet(cell, fluid)){
+            WALBERLA_ABORT("Cell for evaluating pressure difference at point omega " << setup_.pOmega << " is not a fluid cell!")
+         }
+      }
+   }
+
+   // Check if point for evaluating the Strouhal number is located inside a fluid cell
+
+   if (setup_.evaluateStrouhal){
+      auto block = blocks_->getBlock(setup_.pStrouhal);
+      if (block != nullptr){
+         const FlagField_T* const flagField = block->template getData< FlagField_T >(ids_.flagField);
+
+         const auto cell = blocks_->getBlockLocalCell(*block, setup_.pStrouhal);
+         WALBERLA_ASSERT(flagField->xyzSize().contains(cell))
+
+         const auto fluid = flagField->getFlag(setup_.fluidUID);
+
+         if (!flagField->isFlagSet(cell, fluid))
+            WALBERLA_ABORT("Cell for evaluating the Strouhal number at point " << setup_.pStrouhal << " is not a fluid cell!")
+      }
+   }
+
+   initialized_ = true;
+}
+
+void Evaluation::evaluate(real_t& pressureDifference_L, real_t& pressureDifference)
+{
+   if (!initialized_) refresh();
+
+   real_t pAlpha(real_t(0));
+   real_t pOmega(real_t(0));
+
+   WALBERLA_CHECK_NOT_NULLPTR(blocks_)
+
+   if (setup_.evaluatePressure){
+      auto block = blocks_->getBlock(setup_.pAlpha);
+      if (block != nullptr){
+         const ScalarField_T* const densityField = block->template getData< ScalarField_T >(ids_.densityField);
+         const auto cell                         = blocks_->getBlockLocalCell(*block, setup_.pAlpha);
+         WALBERLA_ASSERT(densityField->xyzSize().contains(cell))
+         pAlpha += densityField->get(cell) / real_c(3);
+      }
+
+      block = blocks_->getBlock(setup_.pOmega);
+      if (block != nullptr){
+         const ScalarField_T* const densityField = block->template getData< ScalarField_T >(ids_.densityField);
+         const auto cell                         = blocks_->getBlockLocalCell(*block, setup_.pOmega);
+         WALBERLA_ASSERT(densityField->xyzSize().contains(cell))
+         pOmega += densityField->get(cell) / real_c(3);
+      }
+
+      mpi::reduceInplace(pAlpha, mpi::SUM);
+      mpi::reduceInplace(pOmega, mpi::SUM);
+   }
+
+   WALBERLA_ROOT_SECTION(){
+      pressureDifference_L = pAlpha - pOmega;
+      pressureDifference   = (pressureDifference_L * setup_.rho * setup_.dxC * setup_.dxC) / (setup_.dt * setup_.dt);
+   }
+}
+
+}
\ No newline at end of file
diff --git a/apps/showcases/FlowAroundSphereCPU/Evaluation.h b/apps/showcases/FlowAroundSphereCPU/Evaluation.h
new file mode 100644
index 0000000000000000000000000000000000000000..576cba295b7eb54d01a526391a66c743b7ca8d48
--- /dev/null
+++ b/apps/showcases/FlowAroundSphereCPU/Evaluation.h
@@ -0,0 +1,201 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file Evaluation.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+# pragma once
+
+#include "core/config/Config.h"
+#include "core/math/Sample.h"
+#include "core/math/Constants.h"
+
+#include "lbm_generated/field/PdfField.h"
+
+#include "sqlite/SQLite.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdlib>
+#include <cstring>
+#include <functional>
+#include <iostream>
+#include <memory>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "Types.h"
+#include "Setup.h"
+#include "FlowAroundSphereInfoHeader.h"
+using namespace walberla;
+
+using StorageSpecification_T = lbm::FlowAroundSphereCPUStorageSpecification;
+using Stencil_T              = StorageSpecification_T::Stencil;
+using PdfField_T             = lbm_generated::PdfField< StorageSpecification_T >;
+using FlagField_T            = FlagField< uint8_t >;
+using BoundaryCollection_T = lbm::FlowAroundSphereCPUBoundaryCollection< FlagField_T >;
+
+using VoidFunction = std::function<void ()>;
+
+namespace walberla
+{
+
+struct DragCoefficient {
+   uint_t timestep;
+   double Fx;
+   double Fy;
+   double Fz;
+   double cDRealArea;
+   double cLRealArea;
+   double cDDiscreteArea;
+   double cLDiscreteArea;
+   DragCoefficient(uint_t t, Vector3<double> f, double cdR, double clR, double cdD, double clD) : timestep(t), Fx(f[0]), Fy(f[1]), Fz(f[2]), cDRealArea(cdR), cLRealArea(clR), cDDiscreteArea(cdD), cLDiscreteArea(clD) {}
+};
+
+class Evaluation
+{
+ public:
+   Evaluation(std::shared_ptr< StructuredBlockForest >& blocks, const uint_t checkFrequency, const uint_t rampUpTime,
+              BoundaryCollection_T & boundaryCollection,
+              const IDs& ids, const Setup& setup,
+              const bool logToStream = true, const bool logToFile = true,
+              const std::string& filename = std::string("FlowAroundSphere.txt"))
+      : blocks_(blocks), checkFrequency_(checkFrequency), rampUpTime_(rampUpTime),
+        boundaryCollection_(boundaryCollection), ids_(ids), setup_(setup),
+        logToStream_(logToStream), logToFile_(logToFile), filename_(filename)
+   {
+      WALBERLA_ASSERT_NOT_NULLPTR(blocks)
+      maxLevel_ = blocks->getDepth();
+      blocks->getBlocks(finestBlocks_, maxLevel_);
+
+      coefficients_.resize(uint_c(4));
+      coefficientExtremas_.resize(uint_c(4));
+
+      const real_t factor = setup_.dxC / setup_.dxF;
+      diameterSphere      = real_c(2.0) * (setup_.sphereRadius * factor);
+      surfaceAreaSphere   = math::pi * diameterSphere * diameterSphere;
+      meanVelocity        = setup_.inflowVelocity; // (real_c(4.0) * setup_.inflowVelocity) / real_c(9.0);
+
+      dragFilename_ = std::string("dragSphereRe_") + std::to_string(uint_t(setup_.reynoldsNumber)) + std::string("_meshLevels_") +  std::to_string(uint_t(setup_.refinementLevels + 1)) + std::string(".csv");
+
+      WALBERLA_ROOT_SECTION()
+      {
+         filesystem::path dataFile( dragFilename_.c_str() );
+         if( filesystem::exists( dataFile ) )
+            std::remove( dataFile.string().c_str() );
+
+         std::ofstream outfile( dragFilename_.c_str() );
+         outfile << "SEP=," << "\n";
+
+         setup_.writeParameterHeader(outfile);
+
+         outfile << "timestep," << "Fx," << "Fy," << "Fz," << "cDRealArea," << "cLRealArea," << "cDDiscreteArea," << "cLDiscreteArea" << "\n";
+         outfile.close();
+
+         if (logToFile_)
+         {
+            std::ofstream file(filename_.c_str());
+            file << "# time step [1], force (x) [2], force (y) [3], force (z) [4], "
+                    "cD [5], cL [6], "
+                    "pressure difference (in lattice units) [7], pressure difference (in Pa) [8], vortex velocity (in "
+                    "lattice units) [9], "
+                    "Strouhal number (real D) [10]"
+                 << '\n';
+            if (!setup_.evaluatePressure)
+               file << "# ATTENTION: pressure was not evaluated, pressure difference is set to zero!" << '\n';
+            if (!setup_.evaluateStrouhal)
+               file << "# ATTENTION: vortex velocities were not evaluated, Strouhal number is set to zero!"
+                    << '\n';
+            file.close();
+         }
+      }
+
+      refresh();
+
+      WALBERLA_LOG_INFO_ON_ROOT(
+         "Evaluation initialised:"
+            "\n   + Sphere real area:                      " << surfaceAreaSphere
+         << "\n   + Sphere real diameter:                  " << diameterSphere
+         << "\n   + Sphere discrete area drag coefficient: " << AD_
+         << "\n   + Sphere discrete area lift coefficient: " << AL_
+      )
+
+   }
+
+   void operator()();
+   void forceCalculation(const uint_t level); // for calculating the force
+   void resetForce();
+
+   std::function<void (const uint_t)> forceCalculationFunctor()
+   {
+      return [this](uint_t level) { forceCalculation(level); };
+   }
+
+   std::function<void()> resetForceFunctor()
+   {
+      return [this]() { resetForce(); };
+   }
+
+   void refresh();
+
+ protected:
+   void evaluate(real_t& pressureDifference_L, real_t& pressureDifference);
+
+   bool initialized_{false};
+
+   std::shared_ptr< StructuredBlockForest > blocks_;
+   uint_t maxLevel_;
+   std::vector<Block *> finestBlocks_;
+
+   uint_t coarseExecutionCounter_{ uint_c(0) };
+   uint_t fineExecutionCounter_{ uint_c(0) };
+   uint_t checkFrequency_;
+   uint_t rampUpTime_;
+
+   BoundaryCollection_T & boundaryCollection_;
+
+   IDs ids_;
+   Setup setup_;
+
+   real_t diameterSphere;
+   real_t surfaceAreaSphere;
+   real_t meanVelocity;
+
+   Vector3< double > force_;
+   real_t AD_;
+   real_t AL_;
+   std::vector<DragCoefficient> dragResults;
+
+   std::vector< std::deque< real_t > > coefficients_;
+   std::vector< std::pair< real_t, real_t > > coefficientExtremas_;
+
+   std::vector< real_t > strouhalVelocities_;
+   std::vector< uint_t > strouhalTimeStep_;
+   bool strouhalRising_{false};
+   real_t strouhalNumberRealD_ { real_c(0.0) };
+   uint_t strouhalEvaluationExecutionCount_ { uint_c(0) };
+
+   bool logToStream_;
+   bool logToFile_;
+   std::string filename_;
+   std::string dragFilename_;
+
+   std::map< std::string, double > sqlResults_;
+
+}; // class Evaluation
+
+}
\ No newline at end of file
diff --git a/apps/showcases/FlowAroundSphereCPU/FlowAroundSphereCPU.cpp b/apps/showcases/FlowAroundSphereCPU/FlowAroundSphereCPU.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b7969304911c77b0584c9f1eb9d434e82ff8a815
--- /dev/null
+++ b/apps/showcases/FlowAroundSphereCPU/FlowAroundSphereCPU.cpp
@@ -0,0 +1,450 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file FlowAroundSphere.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#include "blockforest/AABBRefinementSelection.h"
+#include "blockforest/SetupBlockForest.h"
+#include "blockforest/StructuredBlockForest.h"
+#include "blockforest/communication/NonUniformBufferedScheme.h"
+#include "blockforest/loadbalancing/StaticCurve.h"
+
+#include "core/Abort.h"
+#include "core/DataTypes.h"
+#include "core/SharedFunctor.h"
+#include "core/debug/CheckFunctions.h"
+#include "core/logging/Logging.h"
+#include "core/logging/Initialization.h"
+#include "core/math/Vector3.h"
+#include "core/mpi/Environment.h"
+#include "core/mpi/MPIManager.h"
+#include "core/MemoryUsage.h"
+#include "core/mpi/Reduce.h"
+#include "core/timing/RemainingTimeLogger.h"
+#include "core/timing/TimingPool.h"
+
+#include "field/AddToStorage.h"
+#include "field/CellCounter.h"
+#include "field/FlagField.h"
+#include "field/StabilityChecker.h"
+#include "field/adaptors/AdaptorCreators.h"
+#include "field/iterators/FieldIterator.h"
+#include "field/vtk/VTKWriter.h"
+#include "field/vtk/FlagFieldCellFilter.h"
+
+#include "geometry/InitBoundaryHandling.h"
+
+#include "lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h"
+#include "lbm_generated/communication/UniformGeneratedPdfPackInfo.h"
+#include "lbm_generated/evaluation/PerformanceEvaluation.h"
+#include "lbm_generated/field/AddToStorage.h"
+#include "lbm_generated/field/PdfField.h"
+#include "lbm_generated/refinement/BasicRecursiveTimeStep.h"
+#include "lbm_generated/refinement/RefinementScaling.h"
+
+#include "timeloop/SweepTimeloop.h"
+#include "vtk/VTKOutput.h"
+
+#include <cstdlib>
+#include <iostream>
+#include <memory>
+#include <vector>
+
+#include "Evaluation.h"
+#include "GridGeneration.h"
+#include "Setup.h"
+#include "Sphere.h"
+#include "Types.h"
+
+#include "FlowAroundSphereStaticDefines.h"
+
+using namespace walberla;
+
+using BoundaryCollection_T = lbm::FlowAroundSphereCPUBoundaryCollection< FlagField_T >;
+using SweepCollection_T = lbm::FlowAroundSphereCPUSweepCollection;
+
+
+using PdfField_T = lbm_generated::PdfField< StorageSpecification_T >;
+using blockforest::communication::NonUniformBufferedScheme;
+using blockforest::communication::UniformBufferedScheme;
+
+using lbm_generated::NonuniformGeneratedPdfPackInfo;
+using lbm_generated::UniformGeneratedPdfPackInfo;
+
+int main(int argc, char** argv)
+{
+   mpi::Environment env( argc, argv );
+   mpi::MPIManager::instance()->useWorldComm();
+
+   shared_ptr< Config > config = make_shared< Config >();
+   config->readParameterFile( argv[1] );
+   logging::configureLogging(config);
+
+   ///////////////////////
+   /// PARAMETER INPUT ///
+   ///////////////////////
+
+   // read general simulation parameters
+   auto parameters      = config->getOneBlock("Parameters");
+   auto domainParameters= config->getOneBlock("DomainSetup");
+   auto boundariesConfig= config->getBlock("Boundaries");
+   Setup setup(parameters, domainParameters, boundariesConfig, infoMap);
+
+   auto loggingParameters         = config->getOneBlock("Logging");
+   bool writeSetupForestAndReturn              = loggingParameters.getParameter< bool >("writeSetupForestAndReturn", false);
+   if (uint_c(MPIManager::instance()->numProcesses()) > 1) writeSetupForestAndReturn = false;
+
+   WALBERLA_LOG_INFO_ON_ROOT("Diameter of the Sphere is resolved with " << setup.resolutionSphere << " lattice cells.")
+   Sphere Sphere( setup );
+
+   ///////////////////////////
+   /// CREATE BLOCK FOREST ///
+   ///////////////////////////
+
+   shared_ptr< BlockForest > bfs;
+   createBlockForest(bfs, setup);
+
+   if (writeSetupForestAndReturn && mpi::MPIManager::instance()->numProcesses() == 1)
+   {
+      WALBERLA_LOG_INFO_ON_ROOT("BlockForest has been created and writen to file. Returning program")
+
+      const uint_t nBlocks = bfs->getNumberOfBlocks();
+      const uint_t nCells = nBlocks * (setup.cellsPerBlock[0] * setup.cellsPerBlock[1] * setup.cellsPerBlock[2]);
+      const memory_t totalMemory = memory_t(nCells) * setup.memoryPerCell;
+
+      WALBERLA_LOG_INFO_ON_ROOT( "Benchmark run data:"
+                                "\n- simulation parameters:"
+                                "\n   + collision model:     " << infoMap["collisionOperator"] <<
+                                "\n   + stencil:             " << infoMap["stencil"] <<
+                                "\n   + streaming:           " << infoMap["streamingPattern"] <<
+                                "\n   + compressible:        " << ( StorageSpecification_T::compressible ? "yes" : "no" ) <<
+                                "\n   + mesh levels:         " << setup.refinementLevels + uint_c(1) <<
+                                "\n   + resolution:          " << setup.dxC << " - on the coarsest grid" <<
+                                "\n   + resolution:          " << setup.dxF << " - on the finest grid" <<
+                                "\n- domain properties:      "
+                                "\n   + # blocks:            " << nBlocks <<
+                                "\n   + # cells:             " << uint_c(real_c(nCells) / real_c(1e6)) << " [1e6]"
+                                "\n   + # cells sphere D:    " << setup.resolutionSphere <<
+                                "\n   + total memory:        " << totalMemory / 1e9 << " [GB]" <<
+                                "\n- simulation properties:  "
+                                "\n   + sphere pos.(x):      " << setup.sphereXPosition * setup.dxC << " [m]" <<
+                                "\n   + sphere pos.(y):      " << setup.sphereYPosition * setup.dxC << " [m]" <<
+                                "\n   + sphere pos.(z):      " << setup.sphereZPosition * setup.dxC << " [m]" <<
+                                "\n   + sphere radius:       " << setup.sphereRadius * setup.dxC << " [m]" <<
+                                "\n   + kin. viscosity:      " << setup.viscosity * setup.dxC * setup.dxC / setup.dt << " [m^2/s] (on the coarsest grid)" <<
+                                "\n   + viscosity LB:        " << setup.viscosity  << " [dx*dx/dt] (on the coarsest grid)" <<
+                                "\n   + omega:               " << setup.omega << " (on the coarsest grid)" <<
+                                "\n   + rho:                 " << setup.rho << " [kg/m^3]" <<
+                                "\n   + inflow velocity:     " << setup.referenceVelocity << " [m/s]" <<
+                                "\n   + lattice velocity:    " << setup.inflowVelocity << " [dx/dt]" <<
+                                "\n   + Reynolds number:     " << setup.reynoldsNumber <<
+                                "\n   + Mach number:         " << setup.machNumber <<
+                                "\n   + dx (coarsest grid):  " << setup.dxC << " [m]" <<
+                                "\n   + dt (coarsest grid):  " << setup.dt << " [s]"
+                                "\n   + #time steps:         " << setup.timesteps << " (on the coarsest grid, " << ( real_c(1.0) / setup.dt ) << " for 1s of real time)" <<
+                                "\n   + simulation time:     " << ( real_c( setup.timesteps ) * setup.dt ) << " [s]" )
+
+      logging::Logging::printFooterOnStream();
+      return EXIT_SUCCESS;
+   }
+
+   auto blocks = std::make_shared< StructuredBlockForest >(bfs, setup.cellsPerBlock[0], setup.cellsPerBlock[1], setup.cellsPerBlock[2]);
+   blocks->createCellBoundingBoxes();
+
+   ////////////////////////////////////
+   /// CREATE AND INITIALIZE FIELDS ///
+   ////////////////////////////////////
+
+   // create fields
+   const StorageSpecification_T StorageSpec = StorageSpecification_T();
+   IDs ids;
+
+   ids.pdfField = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(2), field::fzyx);
+   ids.velocityField = field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx, uint_c(2));
+   ids.densityField = field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx, uint_c(2));
+   ids.omegaField = field::addToStorage< ScalarField_T >(blocks, "omega", real_c(0.0), field::fzyx, uint_c(2));
+   ids.flagField = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field", uint_c(3));
+
+   auto spongeLayer     = config->getOneBlock("SpongeLayer");
+   const bool deactivateSpongeLayer = spongeLayer.getParameter< bool >("deactivateSpongeLayer");
+   const real_t targetOmega         = spongeLayer.getParameter< real_t >("targetOmega");
+   const real_t spongeStart         = spongeLayer.getParameter< real_t >("spongeStart");
+
+   const real_t omegaDiff = setup.omega - targetOmega;
+   const real_t spongeWidth = real_c(blocks->getDomain().xMax()) - spongeStart;
+   const real_t spongeMid = spongeStart + (spongeWidth / real_c(2.0));
+
+   if(!deactivateSpongeLayer)
+      WALBERLA_LOG_WARNING_ON_ROOT("Using a sponge layer at the Outlet. The sponge layer starts at " << spongeStart << " [m] and targets a relaxation rate of " << targetOmega << " at the outlet" )
+
+   for (auto& block : *blocks)
+   {
+      Block& b = dynamic_cast< Block& >(block);
+      const uint_t level = blocks->getLevel(b);
+
+      auto * omegaField = b.getData< ScalarField_T > ( ids.omegaField );
+      for( auto it = omegaField->beginWithGhostLayer(0); it != omegaField->end(); ++it ){
+         if(deactivateSpongeLayer){
+            omegaField->get(it.cell()) = lbm_generated::relaxationRateScaling(setup.omega, level);
+         }
+         else{
+            Cell globalCell;
+            blocks->transformBlockLocalToGlobalCell(globalCell, block, it.cell());
+            Vector3<real_t> cellCenter = blocks->getCellCenter(globalCell, level);
+            const real_t factor = real_c(0.5) + real_c(0.5) * std::tanh(real_c(2.0) * (cellCenter[0] - spongeMid) / spongeWidth);
+            omegaField->get(it.cell()) = lbm_generated::relaxationRateScaling(setup.omega - (factor * omegaDiff), level);
+         }
+      }
+   }
+
+   WALBERLA_MPI_BARRIER()
+
+   const Cell innerOuterSplit =
+      Cell(parameters.getParameter< Vector3< cell_idx_t > >("innerOuterSplit", Vector3< cell_idx_t >(1, 1, 1)));
+   SweepCollection_T sweepCollection(blocks, ids.pdfField, ids.omegaField, ids.densityField, ids.velocityField, innerOuterSplit);
+
+   WALBERLA_LOG_INFO_ON_ROOT("Setting up communication...")
+   auto nonUniformCommunication = std::make_shared< NonUniformBufferedScheme< CommunicationStencil_T > >(blocks);
+   auto nonUniformPackInfo      = lbm_generated::setupNonuniformPdfCommunication< PdfField_T >(blocks, ids.pdfField);
+   nonUniformCommunication->addPackInfo(nonUniformPackInfo);
+
+   WALBERLA_MPI_BARRIER()
+   WALBERLA_LOG_INFO_ON_ROOT("Setting up communication done")
+
+   /////////////////////////
+   /// BOUNDARY HANDLING ///
+   /////////////////////////
+   WALBERLA_LOG_INFO_ON_ROOT("Start BOUNDARY HANDLING")
+   // create and initialize boundary handling
+   Sphere.setupBoundary(blocks, ids.flagField);
+   geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, ids.flagField, setup.fluidUID, cell_idx_c(0));
+
+   if(parameters.getParameter< bool >("initialiseWithInletVelocity"))
+   {
+      for (auto& block : *blocks)
+      {
+         auto * flagField = block.getData< FlagField_T > ( ids.flagField );
+         auto * velField = block.getData< VelocityField_T > ( ids.velocityField );
+         // auto domainFlag = flagField->getFlag(fluidFlagUID);
+
+         for( auto it = flagField->beginWithGhostLayer(2); it != flagField->end(); ++it )
+         {
+            // if (!isFlagSet(it, domainFlag))
+            //  continue;
+
+            velField->get(it.cell(), 0) = setup.inflowVelocity;
+         }
+      }
+   }
+
+   for (auto& block : *blocks)
+   {
+      sweepCollection.initialise(&block, cell_idx_c(1));
+   }
+
+   std::function< real_t(const Cell&, const Cell&, const shared_ptr< StructuredBlockForest >&, IBlock&) >
+      wallDistanceFunctor = wallDistance(Sphere);
+
+   const real_t omegaFinestLevel = lbm_generated::relaxationRateScaling(setup.omega, setup.refinementLevels);
+   BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfField, setup.fluidUID, omegaFinestLevel, setup.inflowVelocity, wallDistanceFunctor);
+
+   WALBERLA_MPI_BARRIER()
+   WALBERLA_LOG_INFO_ON_ROOT("BOUNDARY HANDLING done")
+
+   //////////////////////////////////
+   /// SET UP SWEEPS AND TIMELOOP ///
+   //////////////////////////////////
+   WALBERLA_LOG_INFO_ON_ROOT("Start SWEEPS AND TIMELOOP")
+   // flow evaluation
+   auto EvaluationParameters      = config->getOneBlock("Evaluation");
+   const uint_t evaluationCheckFrequency       = EvaluationParameters.getParameter< uint_t >("evaluationCheckFrequency");
+   const uint_t rampUpTime                     = EvaluationParameters.getParameter< uint_t >("rampUpTime");
+   const bool evaluationLogToStream            = EvaluationParameters.getParameter< bool >("logToStream");
+   const bool evaluationLogToFile              = EvaluationParameters.getParameter< bool >("logToFile");
+   const std::string evaluationFilename        = EvaluationParameters.getParameter< std::string >("filename");
+
+   shared_ptr< Evaluation > evaluation( new Evaluation( blocks, evaluationCheckFrequency, rampUpTime, boundaryCollection,
+                                                        ids, setup, evaluationLogToStream, evaluationLogToFile, evaluationFilename));
+
+   // create time loop
+   SweepTimeloop timeloop(blocks->getBlockStorage(), setup.timesteps);
+   std::shared_ptr< lbm_generated::BasicRecursiveTimeStep< PdfField_T, SweepCollection_T, BoundaryCollection_T > >
+      LBMRefinement;
+
+   LBMRefinement = std::make_shared<
+   lbm_generated::BasicRecursiveTimeStep< PdfField_T, SweepCollection_T, BoundaryCollection_T > >(
+   blocks, ids.pdfField, sweepCollection, boundaryCollection, nonUniformCommunication, nonUniformPackInfo);
+   LBMRefinement->addPostBoundaryHandlingBlockFunction(evaluation->forceCalculationFunctor());
+   LBMRefinement->addRefinementToTimeLoop(timeloop);
+   //////////////////
+   /// VTK OUTPUT ///
+   //////////////////
+   WALBERLA_LOG_INFO_ON_ROOT("SWEEPS AND TIMELOOP done")
+
+   auto VTKWriter           = config->getOneBlock("VTKWriter");
+   const uint_t vtkWriteFrequency       = VTKWriter.getParameter< uint_t >("vtkWriteFrequency", 0);
+   const uint_t vtkGhostLayers          = VTKWriter.getParameter< uint_t >("vtkGhostLayers", 0);
+   const bool amrFileFormat             = VTKWriter.getParameter< bool >("amrFileFormat", false);
+   const bool oneFilePerProcess         = VTKWriter.getParameter< bool >("oneFilePerProcess", false);
+   const real_t samplingResolution      = VTKWriter.getParameter< real_t >("samplingResolution", real_c(-1.0));
+   const uint_t initialWriteCallsToSkip = VTKWriter.getParameter< uint_t >("initialWriteCallsToSkip", uint_c(0.0));
+
+   auto finalDomain = blocks->getDomain();
+   if (vtkWriteFrequency > 0)
+   {
+      auto vtkOutput =
+         vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, vtkGhostLayers, false, "vtk_FlowAroundSphere",
+                                        "simulation_step", false, true, true, false, 0, amrFileFormat, oneFilePerProcess);
+
+      vtkOutput->setInitialWriteCallsToSkip(initialWriteCallsToSkip);
+
+      vtkOutput->addBeforeFunction([&]() {
+         for (auto& block : *blocks){
+            sweepCollection.calculateMacroscopicParameters(&block);
+         }
+      });
+
+      vtkOutput->setSamplingResolution(samplingResolution);
+
+      field::FlagFieldCellFilter<FlagField_T> fluidFilter( ids.flagField );
+      fluidFilter.addFlag( setup.obstacleUID );
+      vtkOutput->addCellExclusionFilter(fluidFilter);
+
+
+      if (VTKWriter.getParameter< bool >("writeOnlySlice", true)){
+         const AABB sliceXY(finalDomain.xMin(), finalDomain.yMin(), finalDomain.center()[2] - setup.dxF,
+                            finalDomain.xMax(), finalDomain.yMax(), finalDomain.center()[2] + setup.dxF);
+         vtkOutput->addCellInclusionFilter(vtk::AABBCellFilter(sliceXY));
+
+         if (VTKWriter.getParameter< bool >("writeXZSlice", false)){
+            const AABB sliceXZ(finalDomain.xMin(), finalDomain.center()[1] - setup.dxF, finalDomain.zMin(),
+                               finalDomain.xMax(), finalDomain.center()[1] + setup.dxF, finalDomain.zMax());
+            vtkOutput->addCellInclusionFilter(vtk::AABBCellFilter(sliceXZ));
+         }
+      }
+
+      if (VTKWriter.getParameter< bool >("velocity"))
+      {
+         auto velWriter = make_shared< field::VTKWriter< VelocityField_T, float32 > >(ids.velocityField, "velocity");
+         vtkOutput->addCellDataWriter(velWriter);
+      }
+      if (VTKWriter.getParameter< bool >("density"))
+      {
+         auto densityWriter = make_shared< field::VTKWriter< ScalarField_T, float32 > >(ids.densityField, "density");
+         vtkOutput->addCellDataWriter(densityWriter);
+      }
+      if (VTKWriter.getParameter< bool >("omega"))
+      {
+         auto omegaWriter = make_shared< field::VTKWriter< ScalarField_T, float32 > >(ids.omegaField, "omega");
+         vtkOutput->addCellDataWriter(omegaWriter);
+      }
+      if (VTKWriter.getParameter< bool >("flag"))
+      {
+         auto flagWriter = make_shared< field::VTKWriter< FlagField_T > >(ids.flagField, "flag");
+         vtkOutput->addCellDataWriter(flagWriter);
+      }
+      timeloop.addFuncAfterTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
+   }
+
+   // log remaining time
+   const real_t remainingTimeLoggerFrequency =
+      loggingParameters.getParameter< real_t >("remainingTimeLoggerFrequency", 3.0); // in seconds
+   if (uint_c(remainingTimeLoggerFrequency) > 0)
+   {
+      timeloop.addFuncAfterTimeStep(
+         timing::RemainingTimeLogger(timeloop.getNrOfTimeSteps(), remainingTimeLoggerFrequency),
+         "remaining time logger");
+   }
+
+   // LBM stability check
+   auto CheckerParameters      = config->getOneBlock("StabilityChecker");
+   const uint_t checkFrequency = CheckerParameters.getParameter< uint_t >("checkFrequency", uint_t(0));
+   if (checkFrequency > 0)
+   {
+      auto checkFunction = [](PdfField_T::value_type value) {  return value < math::abs(PdfField_T::value_type(10)); };
+      timeloop.addFuncAfterTimeStep(
+         makeSharedFunctor(field::makeStabilityChecker< PdfField_T, FlagField_T >(
+            config, blocks, ids.pdfField, ids.flagField, setup.fluidUID, checkFunction)),
+         "Stability check");
+   }
+
+   timeloop.addFuncBeforeTimeStep( SharedFunctor< Evaluation >(evaluation), "evaluation" );
+
+   WALBERLA_MPI_BARRIER()
+   // WALBERLA_LOG_INFO_ON_ROOT("Execute single timestep to fully complete the preprocessing done")
+
+   //////////////////////
+   /// RUN SIMULATION ///
+   //////////////////////
+   const lbm_generated::PerformanceEvaluation< FlagField_T > performance(blocks, ids.flagField, setup.fluidUID);
+   field::CellCounter< FlagField_T > fluidCells(blocks, ids.flagField, setup.fluidUID);
+   fluidCells();
+
+   WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << blocks->getNumberOfBlocks())
+   for (uint_t level = 0; level <= setup.refinementLevels; level++)
+   {
+      WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << blocks->getNumberOfBlocks(level))
+   }
+
+   WALBERLA_LOG_INFO_ON_ROOT( "Benchmark run data:"
+                             "\n- simulation parameters:"
+                             "\n   + collision model:     " << infoMap["collisionOperator"] <<
+                             "\n   + stencil:             " << infoMap["stencil"] <<
+                             "\n   + streaming:           " << infoMap["streamingPattern"] <<
+                             "\n   + compressible:        " << ( StorageSpecification_T::compressible ? "yes" : "no" ) <<
+                             "\n   + mesh levels:         " << setup.refinementLevels + uint_c(1) <<
+                             "\n   + resolution:          " << setup.dxC << " - on the coarsest grid" <<
+                             "\n   + resolution:          " << setup.dxF << " - on the finest grid" <<
+                             "\n- simulation properties:  "
+                             "\n   + sphere pos.(x):      " << setup.sphereXPosition * setup.dxC << " [m]" <<
+                             "\n   + sphere pos.(y):      " << setup.sphereYPosition * setup.dxC << " [m]" <<
+                             "\n   + sphere pos.(z):      " << setup.sphereZPosition * setup.dxC << " [m]" <<
+                             "\n   + sphere radius:       " << setup.sphereRadius * setup.dxC << " [m]" <<
+                             "\n   + kin. viscosity:      " << setup.viscosity * setup.dxC * setup.dxC / setup.dt << " [m^2/s] (on the coarsest grid)" <<
+                             "\n   + viscosity LB:        " << setup.viscosity  << " [dx*dx/dt] (on the coarsest grid)" <<
+                             "\n   + omega:               " << setup.omega << " (on the coarsest grid)" <<
+                             "\n   + rho:                 " << setup.rho << " [kg/m^3]" <<
+                             "\n   + inflow velocity:     " << setup.referenceVelocity << " [m/s]" <<
+                             "\n   + lattice velocity:    " << setup.inflowVelocity << " [dx/dt]" <<
+                             "\n   + Reynolds number:     " << setup.reynoldsNumber <<
+                             "\n   + Mach number:         " << setup.machNumber <<
+                             "\n   + dt (coarsest grid):  " << setup.dt << " [s]"
+                             "\n   + #time steps:         " << timeloop.getNrOfTimeSteps() << " (on the coarsest grid, " << ( real_t(1) / setup.dt ) << " for 1s of real time)" <<
+                             "\n   + simulation time:     " << ( real_c( timeloop.getNrOfTimeSteps() ) * setup.dt ) << " [s]" )
+
+   WALBERLA_LOG_INFO_ON_ROOT("Starting Simulation")
+   WcTimingPool timeloopTiming;
+   WcTimer simTimer;
+
+   WALBERLA_MPI_BARRIER()
+
+   simTimer.start();
+   timeloop.run(timeloopTiming);
+   WALBERLA_MPI_BARRIER()
+   simTimer.end();
+   WALBERLA_LOG_INFO_ON_ROOT("Simulation finished")
+   real_t time = simTimer.max();
+   WALBERLA_MPI_SECTION() { walberla::mpi::reduceInplace(time, walberla::mpi::MAX); }
+   performance.logResultOnRoot(setup.timesteps, time);
+
+   const auto reducedTimeloopTiming = timeloopTiming.getReduced();
+   WALBERLA_LOG_RESULT_ON_ROOT("Time loop timing:\n" << *reducedTimeloopTiming)
+
+   printResidentMemoryStatistics();
+   return EXIT_SUCCESS;
+}
\ No newline at end of file
diff --git a/apps/showcases/FlowAroundSphereCPU/FlowAroundSphereCPU.py b/apps/showcases/FlowAroundSphereCPU/FlowAroundSphereCPU.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8f4318719d479a44d8dcaaf5873278ed84cdf0c
--- /dev/null
+++ b/apps/showcases/FlowAroundSphereCPU/FlowAroundSphereCPU.py
@@ -0,0 +1,111 @@
+import sympy as sp
+from pystencils import Target
+
+from pystencils.field import fields
+from pystencils.simp import insert_aliases, insert_constants
+
+from lbmpy import LBStencil, LBMConfig, LBMOptimisation
+from lbmpy.boundaries.boundaryconditions import (ExtrapolationOutflow, UBB, QuadraticBounceBack,
+                                                 FreeSlip, NoSlip, FixedDensity)
+from lbmpy.creationfunctions import create_lb_collision_rule
+from lbmpy.enums import Method, Stencil, SubgridScaleModel
+
+from pystencils_walberla import CodeGeneration, generate_info_header
+from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator
+
+info_header = """
+#pragma once
+#include <map>
+std::map<std::string, std::string> infoMap{{{{"stencil", "{stencil}"}},
+                                           {{"streamingPattern", "{streaming_pattern}"}}, 
+                                           {{"collisionOperator", "{collision_operator}"}}}};
+"""
+
+omega = sp.symbols("omega")
+inlet_velocity = sp.symbols("u_x")
+max_threads = 256
+
+with CodeGeneration() as ctx:
+    target = Target.CPU
+    sweep_params = {'block_size': (128, 1, 1)} if ctx.gpu else {}
+
+    dtype = 'float64'
+    pdf_dtype = 'float64'
+
+    stencil = LBStencil(Stencil.D3Q27)
+    q = stencil.Q
+    dim = stencil.D
+
+    streaming_pattern = 'pull'
+
+    pdfs, pdfs_tmp = fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {pdf_dtype}[3D]", layout='fzyx')
+    velocity_field, density_field = fields(f"velocity({dim}), density(1) : {dtype}[{dim}D]", layout='fzyx')
+    omega_field = fields(f"rr(1) : {dtype}[{dim}D]", layout='fzyx')
+
+    macroscopic_fields = {'density': density_field, 'velocity': velocity_field}
+
+    method_enum = Method.CUMULANT
+    lbm_config = LBMConfig(
+        method=method_enum,
+        stencil=stencil,
+        relaxation_rate=omega_field.center,
+        compressible=True,
+        # subgrid_scale_model=SubgridScaleModel.QR,
+        fourth_order_correction=1e-4 if method_enum == Method.CUMULANT and stencil.Q == 27 else False,
+        field_name='pdfs',
+        streaming_pattern=streaming_pattern,
+    )
+
+    lbm_opt = LBMOptimisation(cse_global=False, cse_pdfs=False)
+
+    collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
+    if lbm_config.method == Method.CUMULANT:
+        collision_rule = insert_constants(collision_rule)
+        collision_rule = insert_aliases(collision_rule)
+    lb_method = collision_rule.method
+
+    freeslip = lbm_boundary_generator("FreeSlip", flag_uid="FreeSlip", boundary_object=FreeSlip(stencil),
+                                      field_data_type=pdf_dtype)
+    no_slip = lbm_boundary_generator(class_name='NoSlip', flag_uid='NoSlip',
+                                     boundary_object=NoSlip(), field_data_type=pdf_dtype)
+
+    quadratic_bounce_back = QuadraticBounceBack(omega, calculate_force_on_boundary=True)
+    no_slip_interpolated = lbm_boundary_generator(class_name='Obstacle', flag_uid='Obstacle',
+                                                  boundary_object=quadratic_bounce_back,
+                                                  field_data_type=pdf_dtype)
+
+    ubb = lbm_boundary_generator(class_name='UBB', flag_uid='UBB',
+                                 boundary_object=UBB((inlet_velocity, 0.0, 0.0), density=1.0, data_type=dtype),
+                                 field_data_type=pdf_dtype)
+
+    outflow_boundary = ExtrapolationOutflow(stencil[4], lb_method)
+    outflow = lbm_boundary_generator(class_name='Outflow', flag_uid='Outflow',
+                                     boundary_object=ExtrapolationOutflow(stencil[4], lb_method),
+                                     field_data_type=pdf_dtype)
+
+    fixed_density = lbm_boundary_generator(class_name='FixedDensity', flag_uid='FixedDensity',
+                                           boundary_object=FixedDensity(1.0),
+                                           field_data_type=pdf_dtype)
+
+    generate_lbm_package(ctx, name="FlowAroundSphereCPU", collision_rule=collision_rule,
+                         lbm_config=lbm_config, lbm_optimisation=lbm_opt,
+                         nonuniform=True, boundaries=[freeslip, no_slip, no_slip_interpolated,
+                                                      ubb, outflow, fixed_density],
+                         macroscopic_fields=macroscopic_fields, gpu_indexing_params=sweep_params,
+                         target=target, data_type=dtype, pdfs_data_type=pdf_dtype,
+                         max_threads=max_threads)
+
+    field_typedefs = {'VelocityField_T': velocity_field,
+                      'ScalarField_T': density_field}
+
+    # Info header containing correct template definitions for stencil and field
+    generate_info_header(ctx, 'FlowAroundSphereInfoHeader',
+                         field_typedefs=field_typedefs)
+
+    infoHeaderParams = {
+        'stencil': stencil.name.lower(),
+        'streaming_pattern': streaming_pattern,
+        'collision_operator': lbm_config.method.name.lower(),
+    }
+
+    ctx.write_file("FlowAroundSphereStaticDefines.h", info_header.format(**infoHeaderParams))
diff --git a/apps/showcases/FlowAroundSphereCPU/GridGeneration.h b/apps/showcases/FlowAroundSphereCPU/GridGeneration.h
new file mode 100644
index 0000000000000000000000000000000000000000..9a9e95430c67c174cc33f58bb5c8e297ded29d74
--- /dev/null
+++ b/apps/showcases/FlowAroundSphereCPU/GridGeneration.h
@@ -0,0 +1,126 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file GridGeneration.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#pragma once
+
+#include "blockforest/Initialization.h"
+#include "blockforest/SetupBlock.h"
+#include "blockforest/SetupBlockForest.h"
+#include "blockforest/loadbalancing/StaticCurve.h"
+
+#include "core/Environment.h"
+#include "core/logging/Initialization.h"
+#include "core/timing/RemainingTimeLogger.h"
+#include "core/timing/TimingPool.h"
+
+#include <string>
+#include <fstream>
+
+#include "Types.h"
+#include "Setup.h"
+#include "Sphere.h"
+
+using namespace walberla;
+
+void createSetupBlockForest(SetupBlockForest& setupBfs, const Setup& setup, const bool useMPIManager=false)
+{
+   WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...")
+
+   uint_t numProcesses = setup.numProcesses;
+   const std::string blockForestFilestem = setup.blockForestFilestem;
+
+   if(useMPIManager) {numProcesses = uint_c(mpi::MPIManager::instance()->numProcesses());}
+
+   Sphere Sphere( setup );
+   SphereRefinementSelection SphereRefinementSelection( Sphere, setup.refinementLevels );
+   SphereBlockExclusion SphereBlockExclusion( Sphere );
+
+   setupBfs.addRefinementSelectionFunction(std::function<void(SetupBlockForest &)>(SphereRefinementSelection));
+   setupBfs.addBlockExclusionFunction(SphereBlockExclusion);
+
+   const AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), setup.domainSize[0], setup.domainSize[1], setup.domainSize[2]);
+   setupBfs.addWorkloadMemorySUIDAssignmentFunction(blockforest::uniformWorkloadAndMemoryAssignment);
+   setupBfs.init(domain, setup.rootBlocks[0], setup.rootBlocks[1], setup.rootBlocks[2],
+                         setup.periodic[0], setup.periodic[1], setup.periodic[2]);
+   setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numProcesses);
+
+   if(mpi::MPIManager::instance()->numProcesses() > 1)
+      return;
+
+   {
+      std::ostringstream oss;
+      oss << blockForestFilestem << ".bfs";
+      setupBfs.saveToFile(oss.str().c_str());
+   }
+
+   setupBfs.writeVTKOutput(blockForestFilestem);
+
+   WALBERLA_LOG_INFO_ON_ROOT("===========================  BLOCK FOREST STATISTICS ============================");
+   WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks())
+   for (uint_t level = 0; level <= setup.refinementLevels; level++){
+      const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level);
+      WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks)
+   }
+
+   const real_t avgBlocksPerProc = real_c(setupBfs.getNumberOfBlocks()) / real_c(setupBfs.getNumberOfProcesses());
+   WALBERLA_LOG_INFO_ON_ROOT("Average blocks per process: " << avgBlocksPerProc);
+
+   const uint_t totalNumberCellsPerBlock = setup.cellsPerBlock[0] * setup.cellsPerBlock[1] * setup.cellsPerBlock[2];
+   const uint_t totalNumberCells         = setupBfs.getNumberOfBlocks() * totalNumberCellsPerBlock;
+   const real_t averageCellsPerGPU       = avgBlocksPerProc * real_c(totalNumberCellsPerBlock);
+
+   const uint_t PDFsPerCell = StorageSpecification_T::inplace ? Stencil_T::Q : 2 * Stencil_T::Q;
+   const uint_t valuesPerCell = (PDFsPerCell + VelocityField_T::F_SIZE + ScalarField_T::F_SIZE);
+   const uint_t sizePerValue = sizeof(StorageSpecification_T::value_type);
+   const double expectedMemory = double_c(totalNumberCells * valuesPerCell * sizePerValue) * 1e-9;
+   const double expectedMemoryPerGPU = double_c(averageCellsPerGPU * valuesPerCell * sizePerValue) * 1e-9;
+
+   WALBERLA_LOG_INFO_ON_ROOT( "Total number of cells will be " << totalNumberCells << " fluid cells (in total on all levels)")
+   WALBERLA_LOG_INFO_ON_ROOT( "Expected total memory demand will be " << expectedMemory << " GB")
+   WALBERLA_LOG_INFO_ON_ROOT( "Average memory demand per GPU will be " << expectedMemoryPerGPU << " GB")
+
+   WALBERLA_LOG_INFO_ON_ROOT("=================================================================================");
+}
+
+void createBlockForest(shared_ptr< BlockForest >& bfs, const Setup& setup)
+{
+   if (mpi::MPIManager::instance()->numProcesses() > 1)
+   {
+      // Load structured block forest from file
+      std::ostringstream oss;
+      oss << setup.blockForestFilestem << ".bfs";
+      const std::string setupBlockForestFilepath = oss.str();
+      std::ifstream infile(setupBlockForestFilepath.c_str());
+      if(!infile.good()){
+         WALBERLA_LOG_WARNING_ON_ROOT("Blockforest was not created beforehand and thus needs to be created on the fly. For large simulation runs this can be a severe problem!")
+         SetupBlockForest setupBfs;
+         createSetupBlockForest(setupBfs, setup, true);
+         bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
+      }
+      else{
+         bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()),
+                                               setupBlockForestFilepath.c_str(), false);
+      }
+   }
+   else{
+      SetupBlockForest setupBfs;
+      createSetupBlockForest(setupBfs, setup);
+      bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
+   }
+}
\ No newline at end of file
diff --git a/apps/showcases/FlowAroundSphereCPU/Setup.h b/apps/showcases/FlowAroundSphereCPU/Setup.h
new file mode 100644
index 0000000000000000000000000000000000000000..93bc7ac3a111d181e8e1f8e54f43bd9bd0b50b74
--- /dev/null
+++ b/apps/showcases/FlowAroundSphereCPU/Setup.h
@@ -0,0 +1,169 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file Setup.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#pragma once
+
+#include "core/config/Config.h"
+#include "core/DataTypes.h"
+#include "core/math/Vector3.h"
+
+#include <string>
+
+namespace walberla
+{
+struct Setup
+{
+
+   Setup(const Config::BlockHandle & parameters, const Config::BlockHandle & domainParameters,
+         const Config::BlockHandle & boundaries, std::map<std::string, std::string>& infoMap)
+   {
+      blockForestFilestem         = domainParameters.getParameter< std::string >("blockForestFilestem", "blockforest");
+      numProcesses                = domainParameters.getParameter< uint_t >("numberProcesses");
+      const Vector3< real_t > ds  = domainParameters.getParameter< Vector3< real_t > >("domainSize");
+      const real_t coarseMeshSize = parameters.getParameter< real_t >("coarseMeshSize");
+      cellsPerBlock               = domainParameters.getParameter< Vector3< uint_t > >("cellsPerBlock");
+
+      rootBlocks[0] = uint_c(std::ceil( (ds[0] / coarseMeshSize) / real_c(cellsPerBlock[0])));
+      rootBlocks[1] = uint_c(std::ceil( (ds[1] / coarseMeshSize) / real_c(cellsPerBlock[1])));
+      rootBlocks[2] = uint_c(std::ceil( (ds[2] / coarseMeshSize) / real_c(cellsPerBlock[2])));
+
+      cells      = Vector3<uint_t>(rootBlocks[0] * cellsPerBlock[0], rootBlocks[1] * cellsPerBlock[1], rootBlocks[2] * cellsPerBlock[2]);
+      domainSize = Vector3<real_t>(cells) * coarseMeshSize;
+
+      periodic = domainParameters.getParameter< Vector3< bool > >("periodic");
+      refinementLevels = domainParameters.getParameter< uint_t >("refinementLevels");
+
+      sphereDiameter = parameters.getParameter< real_t >("diameterSphere") / coarseMeshSize;
+      sphereRadius   = sphereDiameter / real_c(2.0);
+
+      sphereXPosition = parameters.getParameter< real_t >("SphereXPosition") / coarseMeshSize;
+      sphereYPosition = real_c(cells[1]) / real_c(2.0);
+      sphereZPosition = real_c(cells[2]) / real_c(2.0);
+
+      reynoldsNumber    = parameters.getParameter< real_t >("reynoldsNumber");
+      referenceVelocity = parameters.getParameter< real_t >("referenceVelocity");
+      inflowVelocity    = parameters.getParameter< real_t >("latticeVelocity");
+
+      const real_t speedOfSound = real_c(real_c(1.0) / std::sqrt( real_c(3.0) ));
+      machNumber = inflowVelocity / speedOfSound;
+      viscosity  = real_c((inflowVelocity * sphereDiameter) / reynoldsNumber);
+      omega      = real_c(real_c(1.0) / (real_c(3.0) * viscosity + real_c(0.5)));
+
+      rho = real_c(1.0);
+      dxC = coarseMeshSize;
+      dxF = real_c(coarseMeshSize) / real_c( 1 << refinementLevels );
+      dt = inflowVelocity / referenceVelocity * coarseMeshSize;
+
+      resolutionSphere = parameters.getParameter< real_t >("diameterSphere") / dxF;
+
+      evaluatePressure = parameters.getParameter< bool >("evaluatePressure");
+      pAlpha = parameters.getParameter< Vector3<real_t> >( "pAlpha" );
+      pOmega = parameters.getParameter< Vector3<real_t> >( "pOmega" );
+
+      evaluateStrouhal = parameters.getParameter< bool >("evaluateStrouhal");;
+      pStrouhal = parameters.getParameter< Vector3<real_t> >( "pStrouhal");
+
+      timesteps = parameters.getParameter< uint_t >("timesteps");
+      numGhostLayers = uint_c(2);
+      nbrOfEvaluationPointsForCoefficientExtremas = 100;
+
+      valuesPerCell = (Stencil_T::Q + VelocityField_T::F_SIZE + uint_c(2) * ScalarField_T::F_SIZE);
+      memoryPerCell = memory_t(valuesPerCell * sizeof(PdfField_T::value_type) + uint_c(1));
+      processMemoryLimit = parameters.getParameter< memory_t >( "processMemoryLimit", memory_t( 512 ) ) * memory_t( 1024 * 1024 );
+
+      stencil          = infoMap["stencil"];
+      streamingPattern = infoMap["streamingPattern"];
+      collisionModel   = infoMap["collisionOperator"];
+
+      fluidUID    = FlagUID("Fluid");
+      obstacleUID = FlagUID(boundaries.getParameter< std::string >("sphere"));
+      inflowUID   = FlagUID(boundaries.getParameter< std::string >("inflow"));
+      outflowUID  = FlagUID(boundaries.getParameter< std::string >("outflow"));
+      wallUID     = FlagUID(boundaries.getParameter< std::string >("walls"));
+
+   }
+
+   void writeParameterHeader(std::ofstream& file)
+   {
+      file << "ReynoldsNumber" << "," << "machNumber" << "," << "coarseMeshSize" << "," << "fineMeshSize" << "," << "resolutionSphere" << ",";
+      file << "refinementLevels" << "," << "stencil" << "," << "streamingPattern" << "," << "collisionOperator" << "," << "omega-coarse" << "\n";
+
+      file << reynoldsNumber << "," << machNumber << "," << dxC << "," << dxF << "," << resolutionSphere << ",";
+      file << refinementLevels << "," << stencil << "," << streamingPattern << "," << collisionModel << "," << omega << "\n";
+   }
+
+   std::string blockForestFilestem;
+   uint_t numProcesses;
+
+   Vector3<uint_t> rootBlocks;
+   Vector3<uint_t> cells;
+   Vector3<real_t> domainSize;
+
+   Vector3< bool > periodic;
+   uint_t refinementLevels;
+
+   Vector3< uint_t > cellsPerBlock;
+   uint_t numGhostLayers;
+
+   real_t sphereXPosition;
+   real_t sphereYPosition;
+   real_t sphereZPosition;
+   real_t sphereRadius;
+   real_t sphereDiameter;
+   real_t resolutionSphere;
+
+   uint_t nbrOfEvaluationPointsForCoefficientExtremas;
+
+   bool evaluatePressure;
+   Vector3< real_t > pAlpha;
+   Vector3< real_t > pOmega;
+
+   bool evaluateStrouhal;
+   Vector3< real_t > pStrouhal;
+
+   real_t machNumber;
+   real_t reynoldsNumber;
+
+   real_t viscosity;
+   real_t omega;
+   real_t rho;
+   real_t inflowVelocity;
+   real_t referenceVelocity;
+   real_t dxC;
+   real_t dxF;
+   real_t dt;
+
+   uint_t timesteps;
+
+   uint_t valuesPerCell;
+   memory_t memoryPerCell;
+   memory_t processMemoryLimit;
+
+   std::string stencil;
+   std::string streamingPattern;
+   std::string collisionModel;
+
+   FlagUID fluidUID;
+   FlagUID obstacleUID;
+   FlagUID inflowUID;
+   FlagUID outflowUID;
+   FlagUID wallUID;
+};
+
+}
\ No newline at end of file
diff --git a/apps/showcases/FlowAroundSphereCPU/Sphere.cpp b/apps/showcases/FlowAroundSphereCPU/Sphere.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7e31c3d3eb97ca19f8eb2d2c8ac5b3911da438f2
--- /dev/null
+++ b/apps/showcases/FlowAroundSphereCPU/Sphere.cpp
@@ -0,0 +1,193 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file Sphere.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#include "Sphere.h"
+
+namespace walberla
+{
+
+bool Sphere::contains(const Vector3< real_t >& point) const
+{
+   return (point - center_).sqrLength() <= radius2_;
+}
+
+bool Sphere::contains(const AABB& aabb) const
+{
+   Vector3< real_t > p[8];
+   p[0].set(aabb.xMin(), aabb.yMin(), aabb.zMin());
+   p[1].set(aabb.xMax(), aabb.yMin(), aabb.zMin());
+   p[2].set(aabb.xMin(), aabb.yMax(), aabb.zMin());
+   p[3].set(aabb.xMax(), aabb.yMax(), aabb.zMin());
+   p[4].set(aabb.xMin(), aabb.yMin(), aabb.zMax());
+   p[5].set(aabb.xMax(), aabb.yMin(), aabb.zMax());
+   p[6].set(aabb.xMin(), aabb.yMax(), aabb.zMax());
+   p[7].set(aabb.xMax(), aabb.yMax(), aabb.zMax());
+   return contains(p[0]) && contains(p[1]) && contains(p[2]) && contains(p[3]) && contains(p[4]) && contains(p[5]) &&
+          contains(p[6]) && contains(p[7]);
+}
+
+real_t Sphere::delta(const Vector3< real_t >& fluid, const Vector3< real_t >& boundary) const
+{
+   WALBERLA_ASSERT(!contains(fluid))
+   WALBERLA_ASSERT(contains(boundary))
+
+   // http://devmag.org.za/2009/04/17/basic-collision-detection-in-2d-part-2/
+   const Vector3< real_t > f = fluid - center_;
+   const Vector3< real_t > d = (boundary - center_) - f;
+
+   const real_t dDotd = d[0] * d[0] + d[1] * d[1] + d[2] * d[2];
+   const real_t fDotf = f[0] * f[0] + f[1] * f[1] + f[2] * f[2];
+   const real_t dDotf = d[0] * f[0] + d[1] * f[1] + d[2] * f[2];
+
+   const real_t b = real_c(2.0) * dDotf;
+   const real_t c = fDotf - radius2_;
+
+   const real_t bb4ac = b * b - (real_c(4.0) * dDotd * c);
+   WALBERLA_CHECK_GREATER_EQUAL(bb4ac, real_c(0.0))
+
+   const real_t sqrtbb4ac = std::sqrt(bb4ac);
+   const real_t alpha = std::min((-b + sqrtbb4ac) / (real_c(2.0) * dDotd), (-b - sqrtbb4ac) / (real_c(2.0) * dDotd));
+
+   WALBERLA_CHECK_GREATER_EQUAL(alpha, real_c(0.0))
+   WALBERLA_CHECK_LESS_EQUAL(alpha, real_c(1.0))
+
+   return alpha;
+}
+
+void Sphere::setupBoundary(const std::shared_ptr< StructuredBlockForest >& sbfs, const BlockDataID flagFieldID)
+{
+
+   for (auto bIt = sbfs->begin(); bIt != sbfs->end(); ++bIt)
+   {
+      auto flagField       = bIt->getData< FlagField_T >(flagFieldID);
+      const FlagField_T::flag_t inflowFlag  = flagField->registerFlag(setup_.inflowUID);
+      const FlagField_T::flag_t outflowFlag = flagField->registerFlag(setup_.outflowUID);
+      const FlagField_T::flag_t wallFlag    = flagField->registerFlag(setup_.wallUID);
+
+      const cell_idx_t gls = cell_idx_c(flagField->nrOfGhostLayers()) - cell_idx_c(1);
+
+      CellInterval blockBB(-1, -1, -1,
+                           cell_idx_c(setup_.cellsPerBlock[0]), cell_idx_c(setup_.cellsPerBlock[1]), cell_idx_c(setup_.cellsPerBlock[2]));
+
+      // inflow WEST
+      if(sbfs->atDomainXMinBorder(*bIt)){
+         CellInterval west(blockBB.xMin() - gls, blockBB.yMin() - gls, blockBB.zMin() - gls, blockBB.xMin(),
+                           blockBB.yMax() + gls, blockBB.zMax() + gls);
+         setBoundaryFromCellInterval(west, inflowFlag, flagField);
+      }
+
+      // outflow EAST
+      if(sbfs->atDomainXMaxBorder(*bIt)){
+         CellInterval east(blockBB.xMax(), blockBB.yMin() - gls, blockBB.zMin() - gls, blockBB.xMax() + gls,
+                           blockBB.yMax() + gls, blockBB.zMax() + gls);
+         setBoundaryFromCellInterval(east, outflowFlag, flagField);
+      }
+
+      // SOUTH
+      if(sbfs->atDomainYMinBorder(*bIt))
+      {
+         CellInterval south(blockBB.xMin() - gls, blockBB.yMin() - gls, blockBB.zMin() - gls, blockBB.xMax() + gls,
+                            blockBB.yMin(), blockBB.zMax() + gls);
+         setBoundaryFromCellInterval(south, wallFlag, flagField);
+      }
+
+      // NORTH
+      if(sbfs->atDomainYMaxBorder(*bIt)){
+         CellInterval north( blockBB.xMin() - gls, blockBB.yMax(), blockBB.zMin() - gls, blockBB.xMax() + gls,
+                             blockBB.yMax() + gls, blockBB.zMax() + gls );
+         setBoundaryFromCellInterval(north, wallFlag, flagField);
+      }
+
+      // BOTTOM
+      if(sbfs->atDomainZMinBorder(*bIt)){
+         CellInterval bottom(blockBB.xMin() - gls, blockBB.yMin() - gls, blockBB.zMin() - gls, blockBB.xMax() + gls,
+                             blockBB.yMax() + gls, blockBB.zMin());
+         setBoundaryFromCellInterval(bottom, wallFlag, flagField);
+      }
+
+      // TOP
+      if(sbfs->atDomainZMaxBorder(*bIt)){
+         CellInterval top(blockBB.xMin() - gls, blockBB.yMin() - gls, blockBB.zMax(), blockBB.xMax() + gls,
+                          blockBB.yMax() + gls, blockBB.zMax() + gls);
+         setBoundaryFromCellInterval(top, wallFlag, flagField);
+      }
+   }
+
+   checkConsistency(sbfs, flagFieldID);
+   setupSphereBoundary(sbfs, flagFieldID);
+}
+
+void Sphere::setBoundaryFromCellInterval(CellInterval& cells, const FlagField_T::flag_t flag, FlagField_T* flagField)
+{
+
+   for (auto cell = cells.begin(); cell != cells.end(); ++cell){
+      if(flagField->get(cell->x(), cell->y(), cell->z()) == FlagField_T::flag_t(0))
+         flagField->addFlag(cell->x(), cell->y(), cell->z(), flag);
+   }
+}
+
+void Sphere::checkConsistency(const std::shared_ptr< StructuredBlockForest >& sbfs, const BlockDataID flagFieldID){
+   const uint_t depth = sbfs->getDepth();
+   for (auto bIt = sbfs->begin(); bIt != sbfs->end(); ++bIt)
+   {
+      Block& b             = dynamic_cast< Block& >(*bIt);
+      auto flagField       = b.getData< FlagField_T >(flagFieldID);
+      if (sbfs->getLevel(b) < depth){
+         for( auto it = flagField->beginWithGhostLayer(1); it != flagField->end(); ++it ){
+            Vector3< real_t > cellCenter = sbfs->getBlockLocalCellCenter( b, it.cell() );
+            sbfs->mapToPeriodicDomain(cellCenter);
+            WALBERLA_CHECK(!contains(cellCenter), "The sphere must be completely located on the finest level")
+         }
+      }
+   }
+}
+
+void Sphere::setupSphereBoundary(const std::shared_ptr< StructuredBlockForest >& sbfs, const BlockDataID flagFieldID){
+   const uint_t depth = sbfs->getDepth();
+   for (auto bIt = sbfs->begin(); bIt != sbfs->end(); ++bIt)
+   {
+      Block& b             = dynamic_cast< Block& >(*bIt);
+      auto flagField       = b.getData< FlagField_T >(flagFieldID);
+      uint8_t obstacleFlag = flagField->registerFlag(setup_.obstacleUID);
+
+      if (sbfs->getLevel(b) == depth){
+         for( auto it = flagField->beginWithGhostLayer(1); it != flagField->end(); ++it ){
+            Vector3< real_t > cellCenter = sbfs->getBlockLocalCellCenter( b, it.cell() );
+            sbfs->mapToPeriodicDomain(cellCenter);
+            if (contains(cellCenter)) { flagField->addFlag(it.x(), it.y(), it.z(), obstacleFlag); }
+         }
+      }
+   }
+}
+
+real_t wallDistance::operator()(const Cell& fluidCell, const Cell& boundaryCell,
+                                const shared_ptr< StructuredBlockForest >& SbF, IBlock& block) const
+{
+   Vector3< real_t > boundary = SbF->getBlockLocalCellCenter( block, boundaryCell );
+   Vector3< real_t > fluid = SbF->getBlockLocalCellCenter( block, fluidCell );
+   SbF->mapToPeriodicDomain(boundary);
+   SbF->mapToPeriodicDomain(fluid);
+
+   WALBERLA_CHECK(!sphere_.contains(fluid), "fluid cell is in contained in sphere (" << fluid[0] << ", " << fluid[1] << ", " << fluid[2] << "). The block local cell is " << fluidCell)
+   WALBERLA_CHECK(sphere_.contains(boundary), "boundary cell is not in contained in sphere (" << boundary[0] << ", " << boundary[1] << ", " << boundary[2] << "). The block local cell is " << boundaryCell)
+
+   return sphere_.delta( fluid, boundary );
+}
+} // namespace walberla
\ No newline at end of file
diff --git a/apps/showcases/FlowAroundSphereCPU/Sphere.h b/apps/showcases/FlowAroundSphereCPU/Sphere.h
new file mode 100644
index 0000000000000000000000000000000000000000..9f9913e62e0dfa1b50d28454d1c7f0c80cb22645
--- /dev/null
+++ b/apps/showcases/FlowAroundSphereCPU/Sphere.h
@@ -0,0 +1,145 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file Sphere.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#pragma once
+
+#include "blockforest/SetupBlock.h"
+#include "blockforest/SetupBlockForest.h"
+#include "blockforest/StructuredBlockForest.h"
+
+#include "domain_decomposition/IBlock.h"
+
+#include "field/FlagUID.h"
+
+#include "core/DataTypes.h"
+#include "core/math/AABB.h"
+#include "core/math/Vector3.h"
+#include "core/cell/Cell.h"
+
+#include "stencil/D3Q7.h"
+#include "stencil/D3Q27.h"
+
+#include "Types.h"
+#include "Setup.h"
+
+namespace walberla
+{
+
+class Sphere
+{
+ public:
+   Sphere(const Setup& setup) : setup_(setup)
+   {
+      const real_t px = setup_.sphereXPosition * setup_.dxC;
+      const real_t py = setup_.sphereYPosition * setup_.dxC;
+      const real_t pz = setup_.sphereZPosition * setup_.dxC;
+
+      center_  = Vector3< real_t >(px, py, pz);
+      radius_  = setup_.sphereRadius * setup_.dxC;
+      radius2_ = radius_ * radius_;
+   }
+
+   bool operator()(const Vector3< real_t >& point) const { return contains(point); }
+
+   bool contains(const Vector3< real_t >& point) const;
+   bool contains(const AABB& aabb) const;
+
+   real_t delta(const Vector3< real_t >& fluid, const Vector3< real_t >& boundary) const;
+   Setup getSetup(){ return setup_; }
+   void setupBoundary(const std::shared_ptr< StructuredBlockForest >& sbfs, const BlockDataID flagFieldID);
+   void checkConsistency(const std::shared_ptr< StructuredBlockForest >& sbfs, const BlockDataID flagFieldID);
+   void setupSphereBoundary(const std::shared_ptr< StructuredBlockForest >& sbfs, const BlockDataID flagFieldID);
+   void setBoundaryFromCellInterval(CellInterval& cells, const FlagField_T::flag_t flag, FlagField_T* flagField);
+
+ private:
+   Setup setup_;
+   Vector3< real_t > center_;
+   real_t radius_;
+   real_t radius2_;
+
+}; // class Sphere
+
+class SphereRefinementSelection
+{
+ public:
+   SphereRefinementSelection(const Sphere& sphere, const uint_t level)
+      : sphere_(sphere), level_(level)
+   {
+      auto setup = sphere_.getSetup();
+      const real_t px = setup.sphereXPosition * setup.dxC;
+      const real_t py = setup.sphereYPosition * setup.dxC;
+      const real_t pz = setup.sphereZPosition * setup.dxC;
+
+      auto center  = Vector3< real_t >(px, py, pz);
+      const real_t sphereRadius = setup.sphereRadius * setup.dxC;
+      const real_t bufferDistance = real_c(5.0) * setup.dxF;
+      const real_t d = sphereRadius + bufferDistance;
+
+      sphereBoundingBox_ = AABB(center[0] - d, center[1] - d, center[2] - d,
+                          center[0] + (real_c(2.5) * sphereRadius), center[1] + d, center[2] + d);
+   }
+
+   void operator()(SetupBlockForest& forest)
+   {
+      for (auto block = forest.begin(); block != forest.end(); ++block)
+      {
+         const AABB& aabb = block->getAABB();
+
+         if (block->getLevel() < level_ && sphereBoundingBox_.intersects(aabb))
+            block->setMarker(true);
+      }
+   }
+
+ private:
+   Sphere sphere_;
+   uint_t level_;
+   AABB sphereBoundingBox_;
+
+}; // class SphereRefinementSelection
+
+class SphereBlockExclusion
+{
+ public:
+   SphereBlockExclusion(const Sphere& sphere) : sphere_(sphere) {}
+
+   bool operator()(const blockforest::SetupBlock& block)
+   {
+      const AABB aabb = block.getAABB();
+      return static_cast< bool >(sphere_.contains(aabb));
+   }
+
+ private:
+   Sphere sphere_;
+
+}; // class SphereBlockExclusion
+
+
+class wallDistance
+{
+ public:
+   wallDistance(const Sphere& sphere) : sphere_(sphere) {}
+
+   real_t operator()(const Cell& fluidCell, const Cell& boundaryCell, const shared_ptr< StructuredBlockForest >& SbF,
+                     IBlock& block) const;
+
+ private:
+   Sphere sphere_;
+}; // class wallDistance
+
+} // namespace walberla
\ No newline at end of file
diff --git a/apps/showcases/FlowAroundSphereCPU/Types.h b/apps/showcases/FlowAroundSphereCPU/Types.h
new file mode 100644
index 0000000000000000000000000000000000000000..9a2d2af3da6cf659ed9fbdae19919bea3873f861
--- /dev/null
+++ b/apps/showcases/FlowAroundSphereCPU/Types.h
@@ -0,0 +1,49 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file Types.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+# pragma once
+
+#include "domain_decomposition/BlockDataID.h"
+
+#include "lbm_generated/field/PdfField.h"
+
+#include "FlowAroundSphereInfoHeader.h"
+
+using namespace walberla;
+
+using StorageSpecification_T = lbm::FlowAroundSphereCPUStorageSpecification;
+using Stencil_T              = StorageSpecification_T::Stencil;
+using CommunicationStencil_T = StorageSpecification_T::CommunicationStencil;
+
+using PdfField_T           = lbm_generated::PdfField< StorageSpecification_T >;
+using FlagField_T          = FlagField< uint8_t >;
+
+struct IDs
+{
+   BlockDataID pdfField;
+   BlockDataID velocityField;
+   BlockDataID densityField;
+   BlockDataID omegaField;
+   BlockDataID flagField;
+
+   BlockDataID pdfFieldGPU;
+   BlockDataID velocityFieldGPU;
+   BlockDataID densityFieldGPU;
+   BlockDataID omegaFieldGPU;
+};