From 9549e956a26462ab305c51ed3735e5b740d0c0ad Mon Sep 17 00:00:00 2001
From: Markus Holzer <mholzer@ldmpe865h.onecert.fr>
Date: Wed, 29 Jan 2025 16:14:44 +0100
Subject: [PATCH] [skip ci] update

---
 apps/showcases/PorousMediaGPU/CMakeLists.txt  |   4 +-
 apps/showcases/PorousMediaGPU/Inflow.cpp      | 143 +++++++++++++++
 apps/showcases/PorousMediaGPU/Inflow.h        |  98 ++++++++++
 .../PorousMediaGPU/PorousMediaGPU.cpp         | 170 +++--------------
 .../PorousMediaGPU/PorousMediaGPU.prm         |  18 +-
 apps/showcases/PorousMediaGPU/util.cpp        |  92 ++++++++++
 apps/showcases/PorousMediaGPU/util.h          |  33 ++++
 apps/showcases/PorousMediaGPU/utilGPU.cu      | 171 ++++++++++++++++++
 apps/showcases/PorousMediaGPU/utilGPU.h       |  37 ++++
 9 files changed, 614 insertions(+), 152 deletions(-)
 create mode 100644 apps/showcases/PorousMediaGPU/Inflow.cpp
 create mode 100644 apps/showcases/PorousMediaGPU/Inflow.h
 create mode 100644 apps/showcases/PorousMediaGPU/util.cpp
 create mode 100644 apps/showcases/PorousMediaGPU/util.h
 create mode 100644 apps/showcases/PorousMediaGPU/utilGPU.cu
 create mode 100644 apps/showcases/PorousMediaGPU/utilGPU.h

diff --git a/apps/showcases/PorousMediaGPU/CMakeLists.txt b/apps/showcases/PorousMediaGPU/CMakeLists.txt
index 0d69de5d8..5de0f2ccf 100644
--- a/apps/showcases/PorousMediaGPU/CMakeLists.txt
+++ b/apps/showcases/PorousMediaGPU/CMakeLists.txt
@@ -18,11 +18,11 @@ waLBerla_generate_target_from_python(NAME PorousMediaGPUCodeGen
 
 if (WALBERLA_BUILD_WITH_CUDA OR WALBERLA_BUILD_WITH_HIP)
     waLBerla_add_executable ( NAME PorousMediaGPU
-            FILES PorousMediaGPU.cpp GridGeneration.h Setup.h Types.h InitializerFunctions.cpp TPMS.cpp
+            FILES PorousMediaGPU.cpp GridGeneration.h Setup.h Types.h InitializerFunctions.cpp TPMS.cpp Inflow.cpp util.cpp utilGPU.cu
             DEPENDS blockforest boundary core field gpu lbm_generated postprocessing stencil timeloop vtk PorousMediaGPUCodeGen )
 else()
     waLBerla_add_executable ( NAME PorousMediaGPU
-            FILES PorousMediaGPU.cpp GridGeneration.h Setup.h Types.h InitializerFunctions.cpp TPMS.cpp
+            FILES PorousMediaGPU.cpp GridGeneration.h Setup.h Types.h InitializerFunctions.cpp TPMS.cpp Inflow.cpp util.cpp
             DEPENDS blockforest boundary core field lbm_generated postprocessing stencil timeloop vtk PorousMediaGPUCodeGen )
 
 endif(WALBERLA_BUILD_WITH_CUDA OR WALBERLA_BUILD_WITH_HIP)
diff --git a/apps/showcases/PorousMediaGPU/Inflow.cpp b/apps/showcases/PorousMediaGPU/Inflow.cpp
new file mode 100644
index 000000000..f3b009b41
--- /dev/null
+++ b/apps/showcases/PorousMediaGPU/Inflow.cpp
@@ -0,0 +1,143 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file InitializerFunctions.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#include "Inflow.h"
+
+namespace walberla{
+
+void Inflow::logging(){
+    WALBERLA_LOG_INFO_ON_ROOT( "Using turbulent injection with:" << std::setprecision(16) <<
+      "\n   + kin. viscosity:        " << nu_ << " [m^2/s]" <<
+      "\n   + uRMS:                  " << urms_ <<
+      "\n   + characteristicLength:  " << le_ <<
+      "\n   + numberOfFourrierModes: " << n_ <<
+      "\n   + alpha:                 " << alpha_ <<
+      "\n   + kappa VKP:             " << kappaVKP_ <<
+      "\n   + epsilon VKP:           " << epsilonVKP_ <<
+      "\n   + kappa Kolmogorov:      " << kappaKolmogorov_ <<
+      "\n   + firstMode:             " << firstMode_ <<
+      "\n   + lastMode:              " << lastMode_)
+}
+
+void Inflow::updateCPU(){
+
+    const real_t t = real_c(executionCounter_) * dt_;
+    for (auto& block : *blocks_){
+        auto velocityFieldInflowSlice = block.template getData< Field_T >(ids_.velocityFieldInflowSlice);
+        const cell_idx_t zSize = cell_idx_c( velocityFieldInflowSlice->zSize() );
+        const cell_idx_t ySize = cell_idx_c( velocityFieldInflowSlice->ySize() );
+        const cell_idx_t gl = cell_idx_c( velocityFieldInflowSlice->nrOfGhostLayers() );
+
+        WALBERLA_ASSERT_EQUAL(blocks_->getLevel( block ), uint_c(0), "Inflow must be located on coarsest resolution")
+
+        for( cell_idx_t z = 0; z < zSize; ++z ) { 
+            for( cell_idx_t y = 0; y < ySize; ++y ){
+                auto minCorner = block.getAABB().minCorner();
+
+                const real_t zCenter = (globalMinCorner_[2] + ( real_c( z ) + real_c(0.5) ) * dx_) + minCorner[2];
+                const real_t yCenter = (globalMinCorner_[1] + ( real_c( y ) + real_c(0.5) ) * dx_) + minCorner[1];
+
+                Vector3<real_t> p(real_c(0.0), yCenter, zCenter);
+
+                const Vector3<real_t> v = summOfFourrierModes(p, uc_, kappa_, ut_, t);
+                // velocityFieldInflowSlice->get(0, y, z, 0) = latticeVelocity_ + v[0];
+                velocityFieldInflowSlice->get(0, y, z, 0) = v[0];
+                velocityFieldInflowSlice->get(0, y, z, 1) = v[1];
+                velocityFieldInflowSlice->get(0, y, z, 2) = v[2];
+            } 
+        }
+    }
+    gpu::fieldCpy< GPUField_T, Field_T >(blocks_, ids_.velocityFieldInflowSliceGPU, ids_.velocityFieldInflowSlice);
+    ++executionCounter_;
+}
+
+void Inflow::updateGPU(){
+
+    const real_t t = real_c(executionCounter_) * dt_;
+    for (auto& block : *blocks_){
+        
+        auto blockMinCorner = block.getAABB().min();
+
+        auto velocityGPU = block.template getData< GPUField_T >(ids_.velocityFieldInflowSliceGPU);
+        updateVelocityPlane(velocityGPU, kappaGPU_, utGPU_, globalMinCorner_, blockMinCorner, uc_, 
+                            dx_, t, n_, latticeVelocity_, block_, grid_);
+
+    }
+    ++executionCounter_;
+}
+
+void Inflow::init(){
+    nu_    = setup_.kinViscosity;
+    urms_  = setup_.urms;
+    le_    = setup_.characteristicLength;
+    n_     = setup_.numberOfFourrierModes;
+    dt_    = setup_.dt;
+    dx_    = setup_.dxC;
+    uc_[0] = setup_.uc[0];
+    uc_[1] = setup_.uc[1];
+    uc_[2] = setup_.uc[2];
+
+    latticeVelocity_ = setup_.latticeVelocity;
+
+    alpha_           = (real_c(55.0) * std::tgamma(real_c(5.0) / real_c(6.0))) / (real_c(9.0) * std::sqrt(math::pi) * std::tgamma(real_c(1.0) / real_c(3.0)));
+    kappaVKP_        = (real_c(9.0) * math::pi * alpha_) / (real_c(55.0) * le_);
+    // epsilonVKP_      = (std::pow(alpha_ * std::tgamma(real_c(2.0)/real_c(3.0)), (real_c(3.0)/real_c(2.0)))) * (real_c(0.5) * kappaVKP_ * std::pow(urms_, real_c(3.0)));
+    epsilonVKP_      = std::pow(urms_, real_c(3.0)) / le_;
+    kappaKolmogorov_ = std::pow(epsilonVKP_ / std::pow(nu_, real_c(3.0)), real_c(1.0)/real_c(4.0));
+
+    firstMode_ = std::log10(kappaVKP_ / real_c(1000.0));
+    lastMode_  = std::log10(kappaKolmogorov_ * real_c(1000.0));
+    const real_t linDeltaKappa = (lastMode_ - firstMode_) / (real_c(n_) - real_c(1.0));
+
+    kappa_.resize(n_);
+    ut_.resize(n_);
+    real_t utSquaredSum = 0.0;
+    for(uint_t i = 0; i < n_; ++i){
+        const real_t kappaN  = std::pow(real_c(10.0), firstMode_ + (real_c(i) * linDeltaKappa));
+        const real_t kappaN1 = std::pow(real_c(10.0), firstMode_ + (real_c(i + 1) * linDeltaKappa));
+        const real_t deltaKappa = kappaN1 - kappaN;
+
+        kappa_[i] = kappaN;
+        const real_t tmp = std::sqrt(getKarmanPaoEnergy(kappaN, alpha_, kappaVKP_, kappaKolmogorov_, urms_) * deltaKappa);
+        ut_[i] = tmp;
+        utSquaredSum += tmp*tmp;
+    }
+
+    const real_t correctionTKE = std::sqrt(((real_c(3.0) / real_c(2.0)) * (urms_*urms_)) / utSquaredSum);
+
+    for(uint_t i = 0; i < n_; ++i){
+        ut_[i] *= correctionTKE;
+    }
+
+    globalMinCorner_ = blocks_->getDomain().min();
+
+    const Vector3<uint_t> s(n_, setup_.cellsPerBlock[1], setup_.cellsPerBlock[2]);
+    getBlockAndGridSize(s, block_, grid_);
+
+    WALBERLA_LOG_DEVEL_VAR(block_)
+    WALBERLA_LOG_DEVEL_VAR(grid_)
+
+    WALBERLA_GPU_CHECK(gpuMalloc((void **)&kappaGPU_, sizeof(real_t) * kappa_.size() ));
+    WALBERLA_GPU_CHECK(gpuMemcpy( kappaGPU_, &kappa_[0], sizeof(real_t) * kappa_.size(), gpuMemcpyHostToDevice ));
+
+    WALBERLA_GPU_CHECK(gpuMalloc((void **)&utGPU_, sizeof(real_t) * ut_.size() ));
+    WALBERLA_GPU_CHECK(gpuMemcpy( utGPU_, &ut_[0], sizeof(real_t) * ut_.size(), gpuMemcpyHostToDevice ));
+}
+
+} // namespace walberla
diff --git a/apps/showcases/PorousMediaGPU/Inflow.h b/apps/showcases/PorousMediaGPU/Inflow.h
new file mode 100644
index 000000000..6950214eb
--- /dev/null
+++ b/apps/showcases/PorousMediaGPU/Inflow.h
@@ -0,0 +1,98 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file Inflow.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#pragma once
+
+#include "core/logging/Initialization.h"
+#include "core/math/Constants.h"
+
+#include "gpu/GPUWrapper.h"
+#include "gpu/ErrorChecking.h"
+#include "gpu/FieldCopy.h"
+
+// #include <curand_kernel.h>
+
+#include "Setup.h"
+#include "util.h"
+#include "utilGPU.h"
+
+namespace walberla{
+
+class Inflow
+{
+ public:
+   Inflow(const std::shared_ptr< StructuredBlockForest >& blocks, const Setup& setup, const IDs& ids)
+      : blocks_(blocks), setup_(setup), ids_(ids), executionCounter_(uint_c(0))
+   {
+       init();
+       logging();
+   }
+
+   using Field_T = walberla::field::GhostLayerField<real_t, 3>;
+   using GPUField_T = gpu::GPUField< real_t >;
+
+   ~Inflow(){
+    WALBERLA_GPU_CHECK(gpuFree(kappaGPU_))
+    WALBERLA_GPU_CHECK(gpuFree(utGPU_))
+   }
+
+   void init();
+   void logging();
+
+   std::function<void()>  updateCPUFunctor() {return [this](){ Inflow::updateCPU();};}
+   std::function<void()>  updateGPUFunctor() {return [this](){ Inflow::updateGPU();};}
+
+   void updateCPU();
+   void updateGPU();
+
+ private:
+   const std::shared_ptr< StructuredBlockForest > blocks_;
+   const Setup setup_;
+   const IDs ids_;
+
+   real_t nu_;
+   real_t urms_;
+   real_t le_;
+   uint_t n_;
+   real_t alpha_;
+   real_t kappaVKP_;
+   real_t epsilonVKP_;
+   real_t kappaKolmogorov_;
+
+   real_t dt_;
+   real_t dx_;
+   Vector3<real_t> uc_;
+   real_t latticeVelocity_;
+
+   real_t firstMode_;
+   real_t lastMode_;
+   std::vector<real_t> kappa_;
+   std::vector<real_t> ut_;
+
+   uint_t executionCounter_; // number of times operator() has been called
+
+   Vector3<real_t> globalMinCorner_;
+
+   Vector3<uint32_t> block_;
+   Vector3<uint32_t> grid_;
+   real_t *kappaGPU_;
+   real_t *utGPU_;
+
+}; // class Inflow
+} // namespace walberla
diff --git a/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp b/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp
index 90c6326aa..63354ab57 100644
--- a/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp
+++ b/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp
@@ -29,7 +29,6 @@
 #include "core/logging/Logging.h"
 #include "core/math/Vector3.h"
 #include "core/math/Constants.h"
-#include "core/math/Random.h"
 #include "core/Environment.h"
 #include "core/mpi/MPIManager.h"
 #include "core/MemoryUsage.h"
@@ -80,6 +79,7 @@
 #include <iostream>
 #include <memory>
 
+#include "Inflow.h"
 #include "InitializerFunctions.h"
 #include "GridGeneration.h"
 #include "Setup.h"
@@ -110,142 +110,6 @@ inline Vector3< uint_t > inflowSliceSize( const shared_ptr< StructuredBlockStora
    return Vector3<uint_t>( uint_c(1), blocks->getNumberOfYCells( *block ), blocks->getNumberOfZCells( *block ) );
 }
 
-template< typename Field_T >
-class Inflow
-{
- public:
-   Inflow(const std::shared_ptr< StructuredBlockForest >& blocks, const Setup& setup, const IDs& ids)
-      : blocks_(blocks), setup_(setup), ids_(ids), executionCounter_(uint_c(0))
-   {
-      nu_   = setup_.kinViscosity;
-      urms_ = setup_.urms;
-      le_   = setup_.characteristicLength;
-      n_    = setup_.numberOfFourrierModes;
-      dt_   = setup_.dt;
-      uc_   = setup_.uc;
-
-      alpha_           = (real_c(55.0) * std::tgamma(real_c(5.0) / real_c(6.0))) / (real_c(9.0) * std::sqrt(math::pi) * std::tgamma(real_c(1.0) / real_c(3.0)));
-      kappaVKP_        = (real_c(9.0) * math::pi * alpha_) / (real_c(55.0) * le_);
-      epsilonVKP_      = (std::pow(alpha_ * std::tgamma(real_c(2.0)/real_c(3.0)), (real_c(3.0)/real_c(2.0)))) * (real_c(0.5) * kappaVKP_ * std::pow(urms_, real_c(3.0)));
-      kappaKolmogorov_ = std::pow(epsilonVKP_ / std::pow(nu_, real_c(3.0)), real_c(1.0)/real_c(4.0));
-
-      s_ = std::log10(kappaVKP_ / real_c(2.0));
-      e_ = std::log10(kappaKolmogorov_ * real_c(2.0));
-      firstMode_ = std::pow(real_c(10.0), s_);
-      lastMode_  = std::pow(real_c(10.0), e_);
-      deltaKappa_ = (lastMode_ - firstMode_) / (real_c(n_) - real_c(1.0));
-
-      logging();
-   }
-
-   void operator()(){
-      ++executionCounter_;
-
-      // only evaluate in given intervals
-      // if (executionCounter_ % interval_ != uint_c(0)) { return; }
-
-      // update the inflow velocity plane
-      update();
-   }
-
-   void logging(){
-      WALBERLA_LOG_INFO_ON_ROOT( "Using turbulent injection with:" << std::setprecision(16) <<
-      "\n   + kin. viscosity:        " << nu_ << " [m^2/s]" <<
-      "\n   + uRMS:                  " << urms_ <<
-      "\n   + characteristicLength:  " << le_ <<
-      "\n   + numberOfFourrierModes: " << n_ <<
-      "\n   + alpha:                 " << alpha_ <<
-      "\n   + kappa VKP:             " << kappaVKP_ <<
-      "\n   + epsilon VKP:           " << epsilonVKP_ <<
-      "\n   + kappa Kolmogorov:      " << kappaKolmogorov_ <<
-      "\n   + firstMode:             " << firstMode_ <<
-      "\n   + lastMode:              " << lastMode_)
-   }
-
- private:
-   const std::shared_ptr< StructuredBlockForest > blocks_;
-   const Setup setup_;
-   const IDs ids_;
-
-   real_t nu_;
-   real_t urms_;
-   real_t le_;
-   uint_t n_;
-   real_t alpha_;
-   real_t kappaVKP_;
-   real_t epsilonVKP_;
-   real_t kappaKolmogorov_;
-
-   real_t dt_;
-   Vector3<real_t> uc_;
-
-   real_t s_;
-   real_t e_;
-   real_t firstMode_;
-   real_t lastMode_;
-   real_t deltaKappa_;
-
-   uint_t executionCounter_; // number of times operator() has been called
-
-   void update(){
-      WALBERLA_LOG_INFO_ON_ROOT("Update inflow")
-      for (auto& block : *blocks_){
-      auto velocityFieldInflowSlice = block.template getData< Field_T >(ids_.velocityFieldInflowSlice);
-      WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(velocityFieldInflowSlice, Cell globalCell;
-         velocityFieldInflowSlice->get(x, y, z, 0) = 0.05;
-      )
-   }
-   gpu::fieldCpy< gpu::GPUField< typename Field_T::value_type >, Field_T >(blocks_, ids_.velocityFieldInflowSliceGPU, ids_.velocityFieldInflowSlice);
-
-   }
-
-   real_t getKarmanPaoEnergy(const real_t kappa){
-      const real_t a1 = alpha_ * std::pow(urms_, real_c(2.0)) / kappaVKP_;
-      const real_t a2 = std::pow(kappa / kappaVKP_, real_c(4.0));
-      const real_t a3 = std::pow(real_c(1.0) + std::pow(kappa / kappaVKP_, real_c(2.0)), real_c(17.0) / real_c(6.0));
-      const real_t a4 = std::exp(real_c(-2.0) * std::pow(kappa / kappaKolmogorov_, real_c(2.0)));
-      return a1 * a2 / a3 * a4;
-   }
-
-   Vector3<real_t> summOfFourrierModes(Vector3<real_t> p){
-
-      Vector3<real_t> result(real_c(0.0), real_c(0.0), real_c(0.0));
-
-      for(uint_t i = 0; i < n_; ++i){
-
-         const real_t a = math::realRandom(real_c(0.0), real_c(1.0));
-         const real_t theta = std::acos(real_c(2.0) * a - real_c(1.0));
-         const real_t phi = real_c(2.0) * math::pi * math::realRandom(real_c(0.0), real_c(1.0));
-         const real_t psi = real_c(2.0) * math::pi * math::realRandom(real_c(0.0), real_c(1.0));
-
-         const real_t kappaN = firstMode_ + real_c(0.5) * deltaKappa_ + real_c(i) * deltaKappa_;
-         
-         Vector3<real_t> kappaVector(std::cos(phi) * std::sin(theta) * kappaN,
-                                     std::sin(theta) * std::sin(phi) * kappaN,
-                                     std::cos(theta) * kappaN);
-
-         Vector3<real_t> zetta(math::realRandom(real_c(-1.0), real_c(1.0)),
-                               math::realRandom(real_c(-1.0), real_c(1.0)),
-                               math::realRandom(real_c(-1.0), real_c(1.0)));
-         zetta = zetta.getNormalized();
-
-         Vector3<real_t> crossProduct = kappaVector%=zetta;
-         Vector3<real_t> sigma = crossProduct / crossProduct.getNormalized();
-
-         const real_t ut = std::sqrt(getKarmanPaoEnergy(kappaN) * deltaKappa_);
-         const real_t t = executionCounter_ * dt_;
-
-         const Vector3<real_t> x = p - t*uc_;
-         const real_t arg = std::cos(kappaVector * x + psi);
-
-         result += real_c(2.0) * ut * arg * sigma;
-      }
-
-      return result;
-   }
-
-}; // class Inflow
-
 int main(int argc, char** argv){
    Environment env( argc, argv );
    mpi::MPIManager::instance()->useWorldComm();
@@ -317,7 +181,7 @@ int main(int argc, char** argv){
    ids.densityFieldGPU  = gpu::addGPUFieldToStorage< ScalarField_T >(blocks, ids.densityField, "velocity on GPU", true);
 
    std::function< Vector3< uint_t > (const shared_ptr< StructuredBlockStorage >&, IBlock * const) > getInflowSliceSize = inflowSliceSize;
-   ids.velocityFieldInflowSlice    = field::addToStorage< VelocityField_T >(blocks, "velocity Inflow", getInflowSliceSize, macroFieldType(0.0), field::fzyx, uint_c(1));
+   ids.velocityFieldInflowSlice    = field::addToStorage< VelocityField_T >(blocks, "velocity Inflow", getInflowSliceSize, macroFieldType(0.0), field::fzyx, uint_c(0));
    ids.velocityFieldInflowSliceGPU = gpu::addGPUFieldToStorage< VelocityField_T >(blocks, ids.velocityFieldInflowSlice, "velocity Inflow on GPU", true);
 
    const Cell innerOuterSplit = Cell(parameters.getParameter< Vector3<cell_idx_t> >("innerOuterSplit", Vector3<cell_idx_t>(1, 1, 1)));
@@ -329,7 +193,31 @@ int main(int argc, char** argv){
    }
    sweepCollection.initialiseBlockPointer();
 
-   Inflow<VelocityField_T> inflow(blocks, setup, ids);
+   auto inflow = std::make_shared<Inflow>(blocks, setup, ids);
+
+   const bool cpu = false; 
+   if(cpu){
+      WALBERLA_LOG_INFO_ON_ROOT("CPU Version")
+      inflow->updateCPU();
+   }
+   else{
+      WALBERLA_LOG_INFO_ON_ROOT("GPU Version")
+      inflow->updateGPU();
+      gpu::fieldCpy< VelocityField_T, gpu::GPUField< VelocityField_T::value_type > >(blocks, ids.velocityFieldInflowSlice, ids.velocityFieldInflowSliceGPU);
+   }
+
+
+   for (auto& block : *blocks){
+      auto velocityFieldInflowSlice = block.getData< VelocityField_T >(ids.velocityFieldInflowSlice);
+
+      WALBERLA_LOG_DEVEL_VAR(velocityFieldInflowSlice->get(0,20,20,0))
+      WALBERLA_LOG_DEVEL_VAR(velocityFieldInflowSlice->get(0,20,20,1))
+      WALBERLA_LOG_DEVEL_VAR(velocityFieldInflowSlice->get(0,20,20,2))
+   }
+
+   return EXIT_SUCCESS;
+
+
    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    ///                                      LB SWEEPS AND BOUNDARY HANDLING                                       ///
    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -350,7 +238,7 @@ int main(int argc, char** argv){
       initGrid(blocks, ids.flagField, setup);
    }
    
-   tpms.setupBoundary(blocks, ids.flagField);
+   // tpms.setupBoundary(blocks, ids.flagField);
    geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, ids.flagField, setup.fluidUID);
    // BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfFieldGPU, fluidFlagUID, latticeVelocity);
    BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfFieldGPU, setup.fluidUID, ids.velocityFieldInflowSliceGPU, ids.pdfField);
@@ -378,7 +266,7 @@ int main(int argc, char** argv){
    auto defaultStream = gpu::StreamRAII::newPriorityStream(streamLowPriority);
    SweepTimeloop timeloop(blocks->getBlockStorage(), setup.timesteps);
 
-   timeloop.addFuncBeforeTimeStep(inflow, "Update inflow velocity");
+   timeloop.addFuncBeforeTimeStep(inflow->updateGPUFunctor(), "Update inflow velocity");
 
    lbm_generated::BasicRecursiveTimeStepGPU< GPUPdfField_T, SweepCollection_T, BoundaryCollection_T >
       LBMMeshRefinement(blocks, ids.pdfFieldGPU, sweepCollection, boundaryCollection, nonUniformCommunication, nonUniformPackInfo);
diff --git a/apps/showcases/PorousMediaGPU/PorousMediaGPU.prm b/apps/showcases/PorousMediaGPU/PorousMediaGPU.prm
index 877f46b3d..f3243cf9f 100644
--- a/apps/showcases/PorousMediaGPU/PorousMediaGPU.prm
+++ b/apps/showcases/PorousMediaGPU/PorousMediaGPU.prm
@@ -7,7 +7,7 @@ Parameters
     latticeVelocity 0.05;
     initialiseWithInletVelocity true;
 
-    timesteps 11;
+    timesteps 1;
 
     processMemoryLimit 512; // MiB
     innerOuterSplit <1, 1, 1>;
@@ -19,10 +19,10 @@ Parameters
 Turbulence
 {
     useGrid false;
-    U_rms  0.1;
-    characteristicLength 0.3333333333333333;
-    numberOfFourrierModes 5;
-    U_c < 1.0, 0.0, 0.0 >;
+    U_rms  0.05;
+    characteristicLength 0.4;
+    numberOfFourrierModes 100;
+    U_c < 0.05, 0.0, 0.0 >;
 }
 
 TPMS
@@ -40,7 +40,7 @@ DomainSetup
 {
     domainSize    < 7, 1, 1 >;
     // cellsPerBlock < 1344, 192, 192 >;
-    cellsPerBlock < 168, 24, 24 >;
+    cellsPerBlock < 336, 48, 48 >;
     blocks    < 1, 1, 1 >;
     periodic    < false, true, true >;
     refinementLevels 0;
@@ -65,10 +65,10 @@ StabilityChecker
 
 VTKWriter
 {
-    vtkWriteFrequency 5;
+    vtkWriteFrequency 0;
     vtkGhostLayers 0;
     velocity true;
-    writeVelocityAsMagnitude true;
+    writeVelocityAsMagnitude false;
     density false;
     flag true;
     writeOnlySlice false;
@@ -86,7 +86,7 @@ Logging
     writeSetupForestAndReturn false; // When only one process is used the decomposition is writen and the program terminates
     readSetupFromFile false;
     remainingTimeLoggerFrequency 20; // in seconds
-    writeSurfaceMeshOfTPMS true;
+    writeSurfaceMeshOfTPMS false;
 }
 
 Evaluation
diff --git a/apps/showcases/PorousMediaGPU/util.cpp b/apps/showcases/PorousMediaGPU/util.cpp
new file mode 100644
index 000000000..90d79836a
--- /dev/null
+++ b/apps/showcases/PorousMediaGPU/util.cpp
@@ -0,0 +1,92 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file util.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#include "util.h"
+
+namespace walberla
+{
+
+Vector3<real_t> summOfFourrierModes(const Vector3<real_t>& p, const Vector3<real_t>& uc,
+                                    const std::vector<real_t>& kappa, const std::vector<real_t>& ut, const real_t t){
+
+    Vector3<real_t> result(real_c(0.0), real_c(0.0), real_c(0.0));
+    const real_t pi2 = real_c(2.0) * math::pi;
+    const uint_t n = uint_c(kappa.size());
+
+    for(uint_t i = 0; i < n; ++i){
+        const real_t a     = 0.1; // math::realRandom(real_c(0.0), real_c(1.0));
+        const real_t phi   = 0.2; //math::realRandom(real_c(0.0), real_c(1.0)) * pi2;
+        const real_t psi   = 0.3; //math::realRandom(real_c(0.0), real_c(1.0)) * pi2;
+        const real_t zetaX = -0.4; //math::realRandom(real_c(-1.0), real_c(1.0));
+        const real_t zetaY = -0.6; //math::realRandom(real_c(-1.0), real_c(1.0));
+        const real_t zetaZ = 0.8; // math::realRandom(real_c(-1.0), real_c(1.0));
+
+        const real_t theta = std::acos(real_c(2.0) * a - real_c(1.0));
+        
+        const real_t kappaN = kappa[i];
+        const real_t kappaX = std::cos(phi)   * std::sin(theta) * kappaN;
+        const real_t kappaY = std::sin(theta) * std::sin(phi)   * kappaN;
+        const real_t kappaZ = std::cos(theta) * kappaN;
+        // WALBERLA_LOG_INFO_ON_ROOT("Kappa: [" << kappaX << ", " << kappaY << ", " << kappaZ << "]")
+
+        const real_t zettaLength = std::sqrt(zetaX*zetaX + zetaY*zetaY + zetaZ*zetaZ);
+        // WALBERLA_LOG_DEVEL_VAR(zettaLength)
+        
+        const real_t zetaXNormalized = zetaX / zettaLength;
+        const real_t zetaYNormalized = zetaY / zettaLength;
+        const real_t zetaZNormalized = zetaZ / zettaLength;
+
+        // WALBERLA_LOG_INFO_ON_ROOT("zeta: [" << zetaXNormalized << ", " << zetaYNormalized << ", " << zetaZNormalized << "]")
+        
+
+        const real_t sigmaX = kappaY * zetaZNormalized - kappaZ * zetaYNormalized;
+        const real_t sigmaY = kappaZ * zetaXNormalized - kappaX * zetaZNormalized;
+        const real_t sigmaZ = kappaX * zetaYNormalized - kappaY * zetaXNormalized;
+
+        const real_t sigmaLength = std::sqrt(sigmaX*sigmaX + sigmaY*sigmaY + sigmaZ*sigmaZ);
+        // WALBERLA_LOG_DEVEL_VAR(sigmaLength)
+
+        const real_t sigmaXNormalized = sigmaX / sigmaLength;
+        const real_t sigmaYNormalized = sigmaY / sigmaLength;
+        const real_t sigmaZNormalized = sigmaZ / sigmaLength;
+
+        // WALBERLA_LOG_INFO_ON_ROOT("Sigma: [" << sigmaXNormalized << ", " << sigmaYNormalized << ", " << sigmaZNormalized << "]")
+
+        const real_t pX = p[0] - (t * uc[0]);
+        const real_t pY = p[1] - (t * uc[1]);
+        const real_t pZ = p[2] - (t * uc[2]);
+
+        const real_t arg =  real_c(2.0) * ut[i] * std::cos(kappaX * pX + kappaY * pY + kappaZ * pZ + psi);
+
+        result[0] += arg * sigmaXNormalized;
+        result[1] += arg * sigmaYNormalized;
+        result[2] += arg * sigmaZNormalized;
+    }
+    return result;
+}
+
+real_t getKarmanPaoEnergy(const real_t k, const real_t alpha, const real_t kappaVKP, const real_t kappaKolmogorov, const real_t uRMS){
+    const real_t a1 = alpha * std::pow(uRMS, real_c(2.0)) / kappaVKP;
+    const real_t a2 = std::pow(k / kappaVKP, real_c(4.0));
+    const real_t a3 = std::pow(real_c(1.0) + std::pow(k / kappaVKP, real_c(2.0)), real_c(17.0) / real_c(6.0));
+    const real_t a4 = std::exp(real_c(-2.0) * std::pow(k / kappaKolmogorov, real_c(2.0)));
+    return a1 * a2 / a3 * a4;
+}
+
+} // namespace walberla
\ No newline at end of file
diff --git a/apps/showcases/PorousMediaGPU/util.h b/apps/showcases/PorousMediaGPU/util.h
new file mode 100644
index 000000000..55750c8e4
--- /dev/null
+++ b/apps/showcases/PorousMediaGPU/util.h
@@ -0,0 +1,33 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file util.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#pragma once
+
+#include "core/DataTypes.h"
+#include "core/math/Constants.h"
+#include "core/math/Vector3.h"
+#include "core/math/Random.h"
+
+namespace walberla{
+Vector3<real_t> summOfFourrierModes(const Vector3<real_t>& p, const Vector3<real_t>& uc,
+                                    const std::vector<real_t>& kappa, const std::vector<real_t>& ut, const real_t t);
+
+real_t getKarmanPaoEnergy(const real_t k, const real_t alpha, const real_t kappaVKP, const real_t kappaKolmogorov, const real_t uRMS);
+
+} // namespace walberla
\ No newline at end of file
diff --git a/apps/showcases/PorousMediaGPU/utilGPU.cu b/apps/showcases/PorousMediaGPU/utilGPU.cu
new file mode 100644
index 000000000..7cb5912b7
--- /dev/null
+++ b/apps/showcases/PorousMediaGPU/utilGPU.cu
@@ -0,0 +1,171 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file util.cu
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#include "utilGPU.h"
+
+namespace walberla
+{
+
+namespace internal{
+
+static __device__ void singleMode(double* rX, double* rY, double* rZ, const double kappaN, const double ut,
+                                  const double pX, const double pY, const double pZ)
+{
+
+   /*
+   curandState state;
+   curand_init(42, xStride*x + yStride*y + zStride*z, 0, &state);
+
+   const real_t a   = curand_uniform(&state);
+   const real_t phi = curand_uniform(&state) * pi2;
+   const real_t psi = curand_uniform(&state) * pi2;
+   const real_t zetaX = real_t(2.0) * curand_uniform(&state) - real_t(1.0);
+   const real_t zetaY = real_t(2.0) * curand_uniform(&state) - real_t(1.0);
+   const real_t zetaZ = real_t(2.0) * curand_uniform(&state) - real_t(1.0);
+   */
+
+   const real_t a     = 0.1; // math::realRandom(real_c(0.0), real_c(1.0));
+   const real_t phi   = 0.2; //math::realRandom(real_c(0.0), real_c(1.0)) * pi2;
+   const real_t psi   = 0.3; //math::realRandom(real_c(0.0), real_c(1.0)) * pi2;
+   const real_t zetaX = -0.4; //math::realRandom(real_c(-1.0), real_c(1.0));
+   const real_t zetaY = -0.6; //math::realRandom(real_c(-1.0), real_c(1.0));
+   const real_t zetaZ = 0.8; // math::realRandom(real_c(-1.0), real_c(1.0));
+
+   const real_t theta = acos(real_t(2.0) * a - real_t(1.0));
+
+   const real_t kappaX = cos(phi) * sin(theta) * kappaN;
+   const real_t kappaY = sin(theta) * sin(phi) * kappaN;
+   const real_t kappaZ = cos(theta) * kappaN;
+
+   const real_t zettaLength = sqrt(zetaX*zetaX + zetaY*zetaY + zetaZ*zetaZ);
+   
+   const real_t zetaXNormalized = zetaX / zettaLength;
+   const real_t zetaYNormalized = zetaY / zettaLength;
+   const real_t zetaZNormalized = zetaZ / zettaLength;
+
+   const real_t sigmaX = kappaY * zetaZNormalized - kappaZ * zetaYNormalized;
+   const real_t sigmaY = kappaZ * zetaXNormalized - kappaX * zetaZNormalized;
+   const real_t sigmaZ = kappaX * zetaYNormalized - kappaY * zetaXNormalized;
+
+   const real_t sigmaLength = sqrt(sigmaX*sigmaX + sigmaY*sigmaY + sigmaZ*sigmaZ);
+
+   const real_t sigmaXNormalized = sigmaX / sigmaLength;
+   const real_t sigmaYNormalized = sigmaY / sigmaLength;
+   const real_t sigmaZNormalized = sigmaZ / sigmaLength;
+
+   const real_t arg =  real_t(2.0) * ut * cos(kappaX * pX + kappaY * pY + kappaZ * pZ + psi);
+
+   *rX = arg * sigmaXNormalized;
+   *rY = arg * sigmaYNormalized;
+   *rZ = arg * sigmaZNormalized;
+   
+}
+
+
+static __global__ void summOfFourrierModesKernel(double * velocity, double * const kappa, double * const ut,
+                                                 const real_t yMin, const real_t zMin, const real_t minCornerY, const real_t minCornerZ,
+                                                 const real_t dx, const real_t ucX, const real_t ucY, const real_t ucZ,
+                                                 const real_t t, const uint_t n, const real_t latticeVelocity,
+                                                 const int64_t xSize, const int64_t ySize, const int64_t zSize,
+                                                 const int64_t xStride, const int64_t yStride, const int64_t zStride, const int64_t fStride)
+{
+   if(blockDim.x*blockIdx.x + threadIdx.x < xSize && blockDim.y*blockIdx.y + threadIdx.y < ySize && blockDim.z*blockIdx.z + threadIdx.z < zSize)
+   {
+    const int64_t x = 0; // blockDim.x*blockIdx.x + threadIdx.x;
+    const int64_t y = blockDim.y*blockIdx.y + threadIdx.y;
+    const int64_t z = blockDim.z*blockIdx.z + threadIdx.z;
+     
+
+    real_t resultX = 0.0;
+    real_t resultY = 0.0;
+    real_t resultZ = 0.0;
+
+    const real_t xCenter = real_t(0.0);
+    const real_t yCenter = (yMin + ( real_t( y ) + real_t(0.5) ) * dx) + minCornerY;
+    const real_t zCenter = (zMin + ( real_t( z ) + real_t(0.5) ) * dx) + minCornerZ;
+    const real_t pi2 = real_t(2.0) * real_t(3.14159265358979323846);
+
+    const real_t pX = xCenter - (t * ucX);
+    const real_t pY = yCenter - (t * ucY);
+    const real_t pZ = zCenter - (t * ucZ);
+
+    for(int64_t i = 0; i < n; ++i){
+
+      double rX, rY, rZ;
+      singleMode(&rX, &rY, &rZ, kappa[i], ut[i], pX, pY, pZ);
+
+      resultX += rX;
+      resultY += rY;
+      resultZ += rZ;
+    }
+
+        
+    velocity[xStride*x + yStride*y + zStride*z + 0*fStride] = resultX;
+    velocity[xStride*x + yStride*y + zStride*z + 1*fStride] = resultY;
+    velocity[xStride*x + yStride*y + zStride*z + 2*fStride] = resultZ;
+
+   }
+   
+}
+}
+
+
+void getBlockAndGridSize(Vector3<uint_t> s, Vector3<uint32_t>& block, Vector3<uint32_t>& grid){
+   
+   const int64_t b0 = 128;
+   const int64_t b1 = 1;
+   const int64_t b2 = 1;
+
+   const int64_t x0 = int64_c(s[0]);
+   const int64_t x1 = int64_c(s[1]);
+   const int64_t x2 = int64_c(s[2]);
+
+   block[0] = uint32_c((1024 < ((b0 < x0) ? b0 : x0)) ? 1024 : ((b0 < x0) ? b0 : x0));
+   block[1] = uint32_c((1024 < ((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))) ? 1024 : ((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))));
+   block[2] = uint32_c((64 < ((x2 < b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))))) ? x2 : b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))))))) ? 64 : ((x2 < b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))))) ? x2 : b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))))));
+   grid[0] =  uint32_c((x0) % (((1024 < ((b0 < x0) ? b0 : x0)) ? 1024 : ((b0 < x0) ? b0 : x0))) == 0 ? (int64_t)(x0) / (int64_t)(((1024 < ((b0 < x0) ? b0 : x0)) ? 1024 : ((b0 < x0) ? b0 : x0))) : ( (int64_t)(x0) / (int64_t)(((1024 < ((b0 < x0) ? b0 : x0)) ? 1024 : ((b0 < x0) ? b0 : x0))) ) +1 );
+   grid[1] =  uint32_c((x1) % (((1024 < ((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))) ? 1024 : ((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))))) == 0 ? (int64_t)(x1) / (int64_t)(((1024 < ((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))) ? 1024 : ((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))))) : ( (int64_t)(x1) / (int64_t)(((1024 < ((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))) ? 1024 : ((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))))) ) +1 );
+   grid[2] =  uint32_c((x2) % (((64 < ((x2 < b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))))) ? x2 : b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))))))) ? 64 : ((x2 < b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))))) ? x2 : b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))))))) == 0 ? (int64_t)(x2) / (int64_t)(((64 < ((x2 < b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))))) ? x2 : b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))))))) ? 64 : ((x2 < b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))))) ? x2 : b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))))))) : ( (int64_t)(x2) / (int64_t)(((64 < ((x2 < b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))))) ? x2 : b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))))))) ? 64 : ((x2 < b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))))) ? x2 : b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))))))) ) +1 );   
+}
+
+void updateVelocityPlane(gpu::GPUField<real_t>* velocityGPU, real_t* kappa, real_t* ut, const Vector3<real_t>& globalMinCorner, const Vector3<real_t>& blockMinCorner, const Vector3<real_t>& uc,
+                         const real_t dx, const real_t t, const uint_t n, const real_t latticeVelocity,
+                         const Vector3<uint32_t>& block, const Vector3<uint32_t>& grid){
+
+   dim3 block_(block[0], block[1], block[2]);
+   dim3 grid_(grid[0], grid[1], grid[2]);
+ 
+   double * data_velocity = velocityGPU->dataAt(0, 0, 0, 0);
+   const int64_t size_velocity_0 = int64_c(velocityGPU->xSize());
+   const int64_t size_velocity_1 = int64_c(velocityGPU->ySize());
+   const int64_t size_velocity_2 = int64_c(velocityGPU->zSize());
+   const int64_t stride_velocity_0 = int64_c(velocityGPU->xStride());
+   const int64_t stride_velocity_1 = int64_c(velocityGPU->yStride());
+   const int64_t stride_velocity_2 = int64_c(velocityGPU->zStride());
+   const int64_t stride_velocity_3 = int64_c(velocityGPU->fStride());
+   internal::summOfFourrierModesKernel<<<grid_, block_, 0, nullptr>>>(data_velocity, kappa, ut,
+                                                                      globalMinCorner[1], globalMinCorner[2], blockMinCorner[1], blockMinCorner[2],
+                                                                      dx, uc[0], uc[1], uc[2], t, n, latticeVelocity,
+                                                                      size_velocity_0, size_velocity_1, size_velocity_2,
+                                                                      stride_velocity_0, stride_velocity_1, stride_velocity_2, stride_velocity_3);
+
+}
+
+
+} // namespace walberla
\ No newline at end of file
diff --git a/apps/showcases/PorousMediaGPU/utilGPU.h b/apps/showcases/PorousMediaGPU/utilGPU.h
new file mode 100644
index 000000000..2e48ae449
--- /dev/null
+++ b/apps/showcases/PorousMediaGPU/utilGPU.h
@@ -0,0 +1,37 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file utilGPU.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#pragma once
+
+#include "core/DataTypes.h"
+#include "core/math/Constants.h"
+
+#include "gpu/GPUWrapper.h"
+#include "gpu/ErrorChecking.h"
+#include "gpu/FieldCopy.h"
+#include "gpu/GPUField.h"
+
+namespace walberla{
+void getBlockAndGridSize(Vector3<uint_t> s, Vector3<uint32_t>& block, Vector3<uint32_t>& grid);
+
+void updateVelocityPlane(gpu::GPUField<real_t>* velocityGPU, real_t* kappa, real_t* ut, const Vector3<real_t>& globalMinCorner, const Vector3<real_t>& blockMinCorner, const Vector3<real_t>& uc,
+                         const real_t dx, const real_t t, const uint_t n, const real_t latticeVelocity,
+                         const Vector3<uint32_t>& block, const Vector3<uint32_t>& grid);
+
+} // namespace walberla
\ No newline at end of file
-- 
GitLab