From 7bd039fa50c1deeb957dee4d6fa596b6372d7f95 Mon Sep 17 00:00:00 2001
From: markus holzer <markus.holzer@fau.de>
Date: Sun, 8 Nov 2020 23:20:42 +0100
Subject: [PATCH] Latticemodel for GPU test case working

---
 .../templates/LatticeModel.tmpl.cpp           |  22 +-
 .../templates/LatticeModel.tmpl.h             |  12 --
 src/cuda/AddGPUFieldToStorage.h               |  17 ++
 src/cuda/AddGPUFieldToStorage.impl.h          | 204 +++++++++++++-----
 src/cuda/CMakeLists.txt                       |   2 +-
 src/cuda/GPUBlockDataHandling.h               | 104 +++++++++
 src/cuda/GPUField.h                           |   4 +-
 src/cuda/GPUPdfField.h                        |  79 +++++++
 src/field/GhostLayerField.h                   |   2 +-
 src/field/blockforest/BlockDataHandling.h     |   2 +-
 src/lbm/field/PdfField.h                      |   2 +-
 tests/cuda/CMakeLists.txt                     |   4 +-
 .../codegen/LbCodeGenerationExampleGPU.cpp    |  77 ++++---
 .../codegen/LbCodeGenerationExampleGPU.prm    |   2 +-
 .../codegen/LbCodeGenerationExampleGPU.py     |  33 ++-
 tests/lbm/codegen/LbCodeGenerationExample.cpp |  28 +--
 16 files changed, 467 insertions(+), 127 deletions(-)
 create mode 100644 src/cuda/GPUBlockDataHandling.h
 create mode 100644 src/cuda/GPUPdfField.h

diff --git a/python/lbmpy_walberla/templates/LatticeModel.tmpl.cpp b/python/lbmpy_walberla/templates/LatticeModel.tmpl.cpp
index 39360a408..a7c94a37f 100644
--- a/python/lbmpy_walberla/templates/LatticeModel.tmpl.cpp
+++ b/python/lbmpy_walberla/templates/LatticeModel.tmpl.cpp
@@ -20,7 +20,7 @@
 
 #include "core/DataTypes.h"
 #include "core/Macros.h"
-#include "lbm/field/PdfField.h"
+{%if target is equalto 'gpu'%}#include "cuda/GPUPdfField.h"{% else %}#include "lbm/field/PdfField.h"{% endif %}
 #include "lbm/sweeps/Streaming.h"
 #include "{{class_name}}.h"
 
@@ -65,13 +65,16 @@ void {{class_name}}::Sweep::streamCollide( IBlock * block, const uint_t numberOf
 {
    {{stream_collide_kernel|generate_block_data_to_field_extraction(parameters=['pdfs', 'pdfs_tmp'])|indent(4)}}
 
-   auto & lm = pdfs->latticeModel();
-   //auto & lm = dynamic_cast< lbm::PdfField<{{class_name}}> * > (pdfs)->latticeModel();
+   {%if target is equalto 'gpu'%}
+   auto & lm = static_cast< cuda::GPUPdfField<{{class_name}}> * > (pdfs)->latticeModel();
+   {% else %}
+   auto & lm = dynamic_cast< lbm::PdfField<{{class_name}}> * > (pdfs)->latticeModel();
+   {% endif %}
    WALBERLA_ASSERT_EQUAL( *(lm.blockId_), block->getId() );
 
    {{stream_collide_kernel|generate_refs_for_kernel_parameters(prefix='lm.', parameters_to_ignore=['pdfs', 'pdfs_tmp'])|indent(4) }}
    {%if target is equalto 'gpu'%}
-   {{stream_collide_kernel|generate_call('cell_idx_c(numberOfGhostLayersToInclude)', stream='stream')|indent(4)}}
+   {{stream_collide_kernel|generate_call(stream='stream')|indent(4)}}
    {% else %}
    {{stream_collide_kernel|generate_call('cell_idx_c(numberOfGhostLayersToInclude)')|indent(4)}}
    {% endif %}
@@ -83,13 +86,16 @@ void {{class_name}}::Sweep::collide( IBlock * block, const uint_t numberOfGhostL
 {
    {{collide_kernel|generate_block_data_to_field_extraction(parameters=['pdfs'])|indent(4)}}
 
-   auto & lm = pdfs->latticeModel();
-   //auto & lm = dynamic_cast< lbm::PdfField<{{class_name}}> * > (pdfs)->latticeModel();
+   {%if target is equalto 'gpu'%}
+   auto & lm = static_cast< cuda::GPUPdfField<{{class_name}}> * > (pdfs)->latticeModel();
+   {% else %}
+   auto & lm = dynamic_cast< lbm::PdfField<{{class_name}}> * > (pdfs)->latticeModel();
+   {% endif %}
    WALBERLA_ASSERT_EQUAL( *(lm.blockId_), block->getId() );
 
    {{collide_kernel|generate_refs_for_kernel_parameters(prefix='lm.', parameters_to_ignore=['pdfs', 'pdfs_tmp'])|indent(4) }}
    {%if target is equalto 'gpu'%}
-   {{collide_kernel|generate_call('cell_idx_c(numberOfGhostLayersToInclude)', stream='stream')|indent(4)}}
+   {{collide_kernel|generate_call(stream='stream')|indent(4)}}
    {% else %}
    {{collide_kernel|generate_call('cell_idx_c(numberOfGhostLayersToInclude)')|indent(4)}}
    {% endif %}
@@ -101,7 +107,7 @@ void {{class_name}}::Sweep::stream( IBlock * block, const uint_t numberOfGhostLa
    {{stream_kernel|generate_block_data_to_field_extraction(parameters=['pdfs', 'pdfs_tmp'])|indent(4)}}
 
    {%if target is equalto 'gpu'%}
-   {{stream_kernel|generate_call('cell_idx_c(numberOfGhostLayersToInclude)', stream='stream')|indent(4)}}
+   {{stream_kernel|generate_call(stream='stream')|indent(4)}}
    {% else %}
    {{stream_kernel|generate_call('cell_idx_c(numberOfGhostLayersToInclude)')|indent(4)}}
    {% endif %}
diff --git a/python/lbmpy_walberla/templates/LatticeModel.tmpl.h b/python/lbmpy_walberla/templates/LatticeModel.tmpl.h
index 1b1aacc33..be81e2696 100644
--- a/python/lbmpy_walberla/templates/LatticeModel.tmpl.h
+++ b/python/lbmpy_walberla/templates/LatticeModel.tmpl.h
@@ -345,9 +345,6 @@ struct Equilibrium< {{class_name}}, void >
    static void set( FieldPtrOrIterator & it,
                     const Vector3< real_t > & u = Vector3< real_t >( real_t(0.0) ), real_t rho = real_t(1.0) )
    {
-      {%if target is equalto 'gpu'%}
-      WALBERLA_ABORT("Equilibrium setter Not implemented for target GPU");
-      {% else %}
       {%if not compressible %}
       rho -= real_t(1.0);
       {%endif %}
@@ -355,16 +352,12 @@ struct Equilibrium< {{class_name}}, void >
       {% for eqTerm in equilibrium -%}
       it[{{loop.index0 }}] = {{eqTerm}};
       {% endfor -%}
-      {% endif %}
    }
 
    template< typename PdfField_T >
    static void set( PdfField_T & pdf, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z,
                     const Vector3< real_t > & u = Vector3< real_t >( real_t(0.0) ), real_t rho = real_t(1.0) )
    {
-      {%if target is equalto 'gpu'%}
-      WALBERLA_ABORT("Equilibrium setter Not implemented for target GPU");
-      {% else %}
       {%if not compressible %}
       rho -= real_t(1.0);
       {%endif %}
@@ -373,7 +366,6 @@ struct Equilibrium< {{class_name}}, void >
       {% for eqTerm in equilibrium -%}
       pdf.getF( &xyz0, {{loop.index0 }})= {{eqTerm}};
       {% endfor -%}
-      {% endif %}
    }
 };
 
@@ -463,9 +455,6 @@ struct DensityAndVelocityRange<{{class_name}}, FieldIteratorXYZ>
    static void set( FieldIteratorXYZ & begin, const FieldIteratorXYZ & end, const {{class_name}} & lm,
                     const Vector3< real_t > & u = Vector3< real_t >( real_t(0.0) ), const real_t rho_in = real_t(1.0) )
    {
-         {%if target is equalto 'gpu'%}
-         WALBERLA_ABORT("DensityAndVelocityRange Not implemented for target GPU");
-         {% else %}
         for( auto cellIt = begin; cellIt != end; ++cellIt )
         {
             const auto x = cellIt.x();
@@ -478,7 +467,6 @@ struct DensityAndVelocityRange<{{class_name}}, FieldIteratorXYZ>
 
             Equilibrium<{{class_name}}>::set(cellIt, Vector3<real_t>(u_0, u_1, u_2), rho{%if not compressible %} + real_t(1) {%endif%});
         }
-        {% endif %}
    }
 };
 
diff --git a/src/cuda/AddGPUFieldToStorage.h b/src/cuda/AddGPUFieldToStorage.h
index 3803ff8c8..65734439a 100644
--- a/src/cuda/AddGPUFieldToStorage.h
+++ b/src/cuda/AddGPUFieldToStorage.h
@@ -65,6 +65,23 @@ namespace cuda {
 
 
 
+   //*******************************************************************************************************************
+   /*! Adds a cuda::GPUPdfField to a StructuredBlockStorage using data from a CPU PDF field
+   *
+   *  - adds a GPU PDF field to a StructuredBlockStorage using a CPU PDF field
+   *  - acts similar to addGPUFieldToStorage but needs a latticeModel for GPU usage
+   *  @tparam Field_T  type of the CPU field, the created GPUField will be of type cuda::GPUField<Field_T::value_type>
+   */
+   //*******************************************************************************************************************
+   template< typename Field_T, typename LatticeModel_T>
+   BlockDataID addGPUPdfFieldToStorage( const shared_ptr< StructuredBlockStorage > & bs,
+                                        ConstBlockDataID cpuFieldID,
+                                        const LatticeModel_T & _latticeModel,
+                                        const std::string & identifier,
+                                        bool usePitchedMem = true );
+
+
+
 } // namespace cuda
 } // namespace walberla
 
diff --git a/src/cuda/AddGPUFieldToStorage.impl.h b/src/cuda/AddGPUFieldToStorage.impl.h
index 1befc3e81..1dff45b1c 100644
--- a/src/cuda/AddGPUFieldToStorage.impl.h
+++ b/src/cuda/AddGPUFieldToStorage.impl.h
@@ -22,75 +22,173 @@
 #pragma once
 
 #include "cuda/FieldCopy.h"
+#include "cuda/GPUPdfField.h"
+#include "cuda/GPUBlockDataHandling.h"
+#include "lbm/field/PdfField.h"
+
+namespace walberla
+{
+namespace cuda
+{
+namespace internal
+{
+template< typename GPUField_T >
+GPUField_T* createGPUField(const IBlock* const block, const StructuredBlockStorage* const bs, uint_t ghostLayers,
+                           uint_t fSize, const field::Layout& layout, bool usePitchedMem)
+{
+   return new GPUField_T(bs->getNumberOfXCells(*block), bs->getNumberOfYCells(*block), bs->getNumberOfZCells(*block),
+                         fSize, ghostLayers, layout, usePitchedMem);
+}
+
+template< typename Field_T >
+GPUField< typename Field_T::value_type >* createGPUFieldFromCPUField(const IBlock* const block,
+                                                                     const StructuredBlockStorage* const,
+                                                                     ConstBlockDataID cpuFieldID, bool usePitchedMem)
+{
+   typedef GPUField< typename Field_T::value_type > GPUField_T;
+
+   const Field_T* f = block->getData< Field_T >(cpuFieldID);
+   auto gpuField =
+      new GPUField_T(f->xSize(), f->ySize(), f->zSize(), f->fSize(), f->nrOfGhostLayers(), f->layout(), usePitchedMem);
+
+   cuda::fieldCpy(*gpuField, *f);
+
+   return gpuField;
+}
+
+template<typename PdfField_T, typename LatticeModel_T >
+class GPUPdfFieldHandling
+   : public cuda::GPUBlockDataHandling< GPUPdfField<LatticeModel_T>, LatticeModel_T::Stencil::D == 2 >
+{
+ public:
+   typedef GPUPdfField< LatticeModel_T > GPUPdfField_T;
+   // typedef field::BlockDataHandling< GPUPdfField_T, LatticeModel_T::Stencil::D == 2 > Base_T;
+
+   GPUPdfFieldHandling(const weak_ptr< StructuredBlockStorage >& blocks, ConstBlockDataID cpu_fieldID,
+                       const LatticeModel_T& latticeModel, bool usePitchedMem)
+      : blocks_(blocks), cpu_fieldID_(cpu_fieldID), latticeModel_(latticeModel), usePitchedMem_(usePitchedMem)
+   {}
+
+   inline void serialize(IBlock* const block, const BlockDataID& id, mpi::SendBuffer& buffer)
+   {
+      packLatticeModel(block, id, buffer);
+   }
 
-namespace walberla {
-namespace cuda {
+   void serializeCoarseToFine(Block* const block, const BlockDataID& id, mpi::SendBuffer& buffer, const uint_t child)
+   {
+      packLatticeModel(block, id, buffer);
+   }
 
+   void serializeFineToCoarse(Block* const block, const BlockDataID& id, mpi::SendBuffer& buffer)
+   {
+      packLatticeModel(block, id, buffer);
+   }
 
-   namespace internal
+   void deserialize(IBlock* const block, const BlockDataID& id, mpi::RecvBuffer& buffer)
    {
-      template< typename GPUField_T>
-      GPUField_T * createGPUField( const IBlock * const block,
-                                   const StructuredBlockStorage * const bs,
-                                   uint_t ghostLayers,
-                                   uint_t fSize,
-                                   const field::Layout & layout,
-                                   bool usePitchedMem )
-      {
-         return new GPUField_T( bs->getNumberOfXCells( *block ),
-                                bs->getNumberOfYCells( *block ),
-                                bs->getNumberOfZCells( *block ),
-                                fSize, ghostLayers, layout, usePitchedMem );
-      }
-
-      template< typename Field_T>
-      GPUField< typename Field_T::value_type> *
-      createGPUFieldFromCPUField( const IBlock * const block,
-                                  const StructuredBlockStorage * const,
-                                  ConstBlockDataID cpuFieldID,
-                                  bool usePitchedMem
-                                )
-      {
-         typedef GPUField< typename Field_T::value_type> GPUField_T;
-
-         const Field_T * f = block->getData<Field_T>( cpuFieldID );
-         auto gpuField = new GPUField_T( f->xSize(), f->ySize(), f->zSize(), f->fSize(),
-                                         f->nrOfGhostLayers(), f->layout(), usePitchedMem );
-
-         cuda::fieldCpy( *gpuField, *f );
-
-         return gpuField;
-      }
+      unpackLatticeModel(block, id, buffer);
+   }
 
+   void deserializeCoarseToFine(Block* const block, const BlockDataID& id, mpi::RecvBuffer& buffer)
+   {
+      unpackLatticeModel(block, id, buffer);
+   }
+
+   void deserializeFineToCoarse(Block* const block, const BlockDataID& id, mpi::RecvBuffer& buffer, const uint_t child)
+   {
+      unpackLatticeModel(block, id, buffer);
    }
 
+ protected:
+   GPUPdfField_T* allocate(IBlock* const block)
+   {
+      return allocateDispatch(block, cpu_fieldID_, usePitchedMem_);
+   }
 
-   template< typename GPUField_T>
-   BlockDataID addGPUFieldToStorage(const shared_ptr< StructuredBlockStorage >& bs,
-                                    const std::string & identifier,
-                                    uint_t fSize,
-                                    const Layout layout,
-                                    uint_t nrOfGhostLayers,
-                                    bool usePitchedMem )
+   GPUPdfField_T* reallocate(IBlock* const block)
    {
-      auto func = std::bind ( internal::createGPUField<GPUField_T>, std::placeholders::_1, std::placeholders::_2, nrOfGhostLayers, fSize, layout, usePitchedMem );
-      return bs->addStructuredBlockData< GPUField_T >( func, identifier );
+#ifdef NDEBUG
+      return allocateDispatch(block, cpu_fieldID_, usePitchedMem_);
+#else
+      return allocateDispatch(block, cpu_fieldID_, usePitchedMem_);
+#endif
    }
 
+ private:
+   void packLatticeModel(IBlock* const block, const BlockDataID& id, mpi::SendBuffer& buffer) const
+   {
+      const GPUPdfField_T * field = block->template getData< GPUPdfField_T >(id);
+      WALBERLA_CHECK_NOT_NULLPTR(field);
+      buffer << field->latticeModel();
+   }
 
-   template< typename Field_T>
-   BlockDataID addGPUFieldToStorage( const shared_ptr< StructuredBlockStorage > & bs,
-                                     ConstBlockDataID cpuFieldID,
-                                     const std::string & identifier,
-                                     bool usePitchedMem )
+   void unpackLatticeModel(IBlock* const block, const BlockDataID& id, mpi::RecvBuffer& buffer) const
    {
-      auto func = std::bind ( internal::createGPUFieldFromCPUField<Field_T>, std::placeholders::_1, std::placeholders::_2, cpuFieldID, usePitchedMem );
-      return bs->addStructuredBlockData< GPUField<typename Field_T::value_type> >( func, identifier );
+      GPUPdfField_T * field = block->template getData< GPUPdfField_T >(id);
+      WALBERLA_CHECK_NOT_NULLPTR(field);
+
+      LatticeModel_T latticeModel = field->latticeModel();
+      buffer >> latticeModel;
+
+      auto blocks = blocks_.lock();
+      WALBERLA_CHECK_NOT_NULLPTR(blocks);
+
+      latticeModel.configure(*block, *blocks);
+      field->resetLatticeModel(latticeModel);
    }
 
+   GPUPdfField_T* allocateDispatch(IBlock* const block, ConstBlockDataID cpu_fieldID, bool usePitchedMem)
+   {
+      WALBERLA_ASSERT_NOT_NULLPTR( block );
 
+      auto blocks = blocks_.lock();
+      WALBERLA_CHECK_NOT_NULLPTR( blocks );
+      const PdfField_T * f = block->template getData< PdfField_T >(cpu_fieldID);
 
-} // namespace cuda
-} // namespace walberla
+      LatticeModel_T latticeModel = f->latticeModel();
+      latticeModel_ = latticeModel;
+
+      auto gpuField = new GPUPdfField_T(f->xSize(), f->ySize(), f->zSize(), f->fSize(), latticeModel,
+                                        f->nrOfGhostLayers(), f->layout(), usePitchedMem);
+      cuda::fieldCpy(*gpuField, *f);
+
+      return gpuField;
+   }
 
+   weak_ptr< StructuredBlockStorage > blocks_;
+
+   ConstBlockDataID cpu_fieldID_;
+   LatticeModel_T latticeModel_;
+   bool usePitchedMem_;
+
+}; // class PdfFieldHandling
+} // namespace internal
+
+template< typename GPUField_T >
+BlockDataID addGPUFieldToStorage(const shared_ptr< StructuredBlockStorage >& bs, const std::string& identifier,
+                                 uint_t fSize, const Layout layout, uint_t nrOfGhostLayers, bool usePitchedMem)
+{
+   auto func = std::bind(internal::createGPUField< GPUField_T >, std::placeholders::_1, std::placeholders::_2,
+                         nrOfGhostLayers, fSize, layout, usePitchedMem);
+   return bs->addStructuredBlockData< GPUField_T >(func, identifier);
+}
+
+template< typename Field_T >
+BlockDataID addGPUFieldToStorage(const shared_ptr< StructuredBlockStorage >& bs, ConstBlockDataID cpuFieldID,
+                                 const std::string& identifier, bool usePitchedMem)
+{
+   auto func = std::bind(internal::createGPUFieldFromCPUField< Field_T >, std::placeholders::_1, std::placeholders::_2,
+                         cpuFieldID, usePitchedMem);
+   return bs->addStructuredBlockData< GPUField< typename Field_T::value_type > >(func, identifier);
+}
+
+template< typename PdfField_T, typename LatticeModel_T, typename BlockStorage_T >
+BlockDataID addGPUPdfFieldToStorage(const shared_ptr< BlockStorage_T >& bs, ConstBlockDataID cpu_fieldID,
+                                    const LatticeModel_T& _latticeModel, bool usePitchedMem, const std::string& identifier)
+{
+   return bs->addBlockData(make_shared< internal::GPUPdfFieldHandling<PdfField_T, LatticeModel_T > >(
+                                  bs, cpu_fieldID, _latticeModel, usePitchedMem), identifier);
+}
 
+} // namespace cuda
+} // namespace walberla
diff --git a/src/cuda/CMakeLists.txt b/src/cuda/CMakeLists.txt
index 98aa991f0..b5174a3e5 100644
--- a/src/cuda/CMakeLists.txt
+++ b/src/cuda/CMakeLists.txt
@@ -4,7 +4,7 @@
 #
 ###################################################################################################
 
-waLBerla_add_module( DEPENDS blockforest core communication domain_decomposition executiontree python_coupling field stencil
+waLBerla_add_module( DEPENDS blockforest core communication domain_decomposition executiontree python_coupling field stencil lbm
                      BUILD_ONLY_IF_FOUND CUDA )
 
 ###################################################################################################
\ No newline at end of file
diff --git a/src/cuda/GPUBlockDataHandling.h b/src/cuda/GPUBlockDataHandling.h
new file mode 100644
index 000000000..6b34d2434
--- /dev/null
+++ b/src/cuda/GPUBlockDataHandling.h
@@ -0,0 +1,104 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file GPUBlockDataHandling.h
+//! \ingroup cuda
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "blockforest/BlockDataHandling.h"
+#include "blockforest/StructuredBlockForest.h"
+#include "core/debug/CheckFunctions.h"
+#include "core/math/Vector2.h"
+#include "core/math/Vector3.h"
+#include "field/FlagField.h"
+
+#include <type_traits>
+
+
+namespace walberla {
+namespace cuda {
+
+
+
+// still virtual, one must implement protected member functions 'allocate' and 'reallocate'
+template< typename Field_T, bool Pseudo2D = false >
+class GPUBlockDataHandling : public blockforest::BlockDataHandling< Field_T >
+{
+public:
+
+   typedef typename Field_T::value_type Value_T;
+   typedef std::function< void ( Field_T * field, IBlock * const block ) > InitializationFunction_T;
+
+   virtual ~GPUBlockDataHandling() = default;
+
+   void addInitializationFunction( const InitializationFunction_T & initFunction ) { initFunction_ = initFunction; }
+
+   Field_T * initialize( IBlock * const block )
+   {
+      Field_T * field = allocate( block );
+      return field;
+   }
+
+   inline void serialize( ) {};
+
+   void serializeCoarseToFine( ) {};
+   void serializeFineToCoarse( ) {};
+
+   Field_T * deserialize( IBlock * const block ) { return reallocate( block ); }
+
+   Field_T * deserializeCoarseToFine( Block * const block ) { return reallocate( block ); }
+   Field_T * deserializeFineToCoarse( Block * const block ) { return reallocate( block ); }   
+   
+   void deserialize( ) {};
+
+   void deserializeCoarseToFine( ) {};
+   void deserializeFineToCoarse( ) {};
+
+protected:
+
+   /// must be thread-safe !
+   virtual Field_T *   allocate( IBlock * const block ) = 0; // used in 'initialize'
+   /// must be thread-safe !
+   virtual Field_T * reallocate( IBlock * const block ) = 0; // used in all deserialize member functions
+
+   template< typename T > struct Merge
+   { static T result( const T & value ) { return Pseudo2D ? static_cast<T>( value / numeric_cast<T>(4) ) : static_cast<T>( value / numeric_cast<T>(8) ); } };
+
+   template< typename T > struct Merge< Vector2<T> >
+   { static Vector2<T> result( const Vector2<T> & value ) { return Pseudo2D ? (value / numeric_cast<T>(4)) : (value / numeric_cast<T>(8)); } };
+
+   template< typename T > struct Merge< Vector3<T> >
+   { static Vector3<T> result( const Vector3<T> & value ) { return Pseudo2D ? (value / numeric_cast<T>(4)) : (value / numeric_cast<T>(8)); } };
+
+   void sizeCheck( const uint_t xSize, const uint_t ySize, const uint_t zSize )
+   {
+      WALBERLA_CHECK( (xSize & uint_t(1)) == uint_t(0), "The x-size of your field must be divisible by 2." );
+      WALBERLA_CHECK( (ySize & uint_t(1)) == uint_t(0), "The y-size of your field must be divisible by 2." );
+      if( Pseudo2D )
+      { WALBERLA_CHECK( zSize == uint_t(1), "The z-size of your field must be equal to 1 (pseudo 2D mode)." ); }
+      else
+      { WALBERLA_CHECK( (zSize & uint_t(1)) == uint_t(0), "The z-size of your field must be divisible by 2." ); }
+   }
+   
+   InitializationFunction_T initFunction_;
+
+}; // class GPUBlockDataHandling
+
+} // namespace field
+} // namespace walberla
diff --git a/src/cuda/GPUField.h b/src/cuda/GPUField.h
index 75766957a..3d56fbaa6 100755
--- a/src/cuda/GPUField.h
+++ b/src/cuda/GPUField.h
@@ -65,7 +65,7 @@ namespace cuda {
       GPUField( uint_t _xSize, uint_t _ySize, uint_t _zSize, uint_t _fSize,
                 uint_t _nrOfGhostLayers, const Layout & _layout = zyxf, bool usePitchedMem = true );
 
-      ~GPUField();
+      virtual ~GPUField();
 
       Layout layout() const { return layout_; }
 
@@ -110,7 +110,7 @@ namespace cuda {
       bool hasSameAllocSize( const GPUField<T> & other ) const;
       bool hasSameSize( const GPUField<T> & other ) const;
 
-      GPUField<T> * cloneUninitialized() const;
+      virtual GPUField<T> * cloneUninitialized() const;
 
       void swapDataPointers( GPUField<T> & other );
       void swapDataPointers( GPUField<T> * other ) { swapDataPointers( *other ); }
diff --git a/src/cuda/GPUPdfField.h b/src/cuda/GPUPdfField.h
new file mode 100644
index 000000000..1212e27d7
--- /dev/null
+++ b/src/cuda/GPUPdfField.h
@@ -0,0 +1,79 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file GPUPdfField.h
+//! \ingroup cuda
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "GPUField.h"
+
+namespace walberla
+{
+namespace cuda
+{
+template< typename LatticeModel_T >
+class GPUPdfField : public GPUField< real_t >
+{
+ public:
+   //** Type Definitions  **********************************************************************************************
+   /*! \name Type Definitions */
+   //@{
+   typedef LatticeModel_T LatticeModel;
+   typedef typename LatticeModel_T::Stencil Stencil;
+   //@}
+   //*******************************************************************************************************************
+   GPUPdfField(uint_t _xSize, uint_t _ySize, uint_t _zSize, uint_t _fSize, const LatticeModel_T& _latticeModel,
+               uint_t _nrOfGhostLayers, const Layout& _layout = zyxf, bool usePitchedMem = true);
+
+   virtual ~GPUPdfField() = default;
+
+   const LatticeModel_T& latticeModel() const { return latticeModel_; }
+   LatticeModel_T& latticeModel() { return latticeModel_; }
+
+   void resetLatticeModel(const LatticeModel_T& lm) { latticeModel_ = lm; }
+
+   GPUPdfField<LatticeModel_T> * cloneUninitialized() const;
+
+ protected:
+   LatticeModel_T latticeModel_;
+};
+
+template< typename LatticeModel_T >
+GPUPdfField< LatticeModel_T >::GPUPdfField(uint_t _xSize, uint_t _ySize, uint_t _zSize, uint_t _fSize,
+                                           const LatticeModel_T& _latticeModel, uint_t _nrOfGhostLayers,
+                                           const Layout& _layout, bool usePitchedMem):
+     GPUField(_xSize, _ySize, _zSize, _fSize, _nrOfGhostLayers, _layout, usePitchedMem), latticeModel_(_latticeModel)
+{
+
+}
+
+template< typename LatticeModel_T >
+GPUPdfField<LatticeModel_T> * GPUPdfField<LatticeModel_T>::cloneUninitialized() const
+{
+   GPUPdfField<LatticeModel_T> * res = new GPUPdfField<LatticeModel_T>( xSize(), ySize(), zSize(), fSize(), latticeModel_,
+                                               nrOfGhostLayers(), layout(), isPitchedMem() );
+
+   WALBERLA_ASSERT( hasSameAllocSize( *res ) );
+   WALBERLA_ASSERT( hasSameSize( *res ) );
+   WALBERLA_ASSERT( layout() == res->layout() );
+   WALBERLA_ASSERT( isPitchedMem() == res->isPitchedMem() );
+   return res;
+}
+} // namespace cuda
+} // namespace walberla
\ No newline at end of file
diff --git a/src/field/GhostLayerField.h b/src/field/GhostLayerField.h
index e6c8b454c..00d3b58d1 100644
--- a/src/field/GhostLayerField.h
+++ b/src/field/GhostLayerField.h
@@ -90,7 +90,7 @@ namespace field {
                        const std::vector<T> & fValues, const Layout & layout = zyxf,
                        const shared_ptr<FieldAllocator<T> > &alloc = shared_ptr<FieldAllocator<T> >() );
 
-      virtual ~GhostLayerField() {}
+      virtual ~GhostLayerField() = default;
 
 
 
diff --git a/src/field/blockforest/BlockDataHandling.h b/src/field/blockforest/BlockDataHandling.h
index dc6328a66..98252f6c0 100644
--- a/src/field/blockforest/BlockDataHandling.h
+++ b/src/field/blockforest/BlockDataHandling.h
@@ -45,7 +45,7 @@ public:
    typedef typename Field_T::value_type Value_T;
    typedef std::function< void ( Field_T * field, IBlock * const block ) > InitializationFunction_T;
 
-   virtual ~BlockDataHandling() {}
+   virtual ~BlockDataHandling() = default;
 
    void addInitializationFunction( const InitializationFunction_T & initFunction ) { initFunction_ = initFunction; }
 
diff --git a/src/lbm/field/PdfField.h b/src/lbm/field/PdfField.h
index 5b4429877..4fd11e3b9 100644
--- a/src/lbm/field/PdfField.h
+++ b/src/lbm/field/PdfField.h
@@ -104,7 +104,7 @@ public:
              const uint_t ghostLayers = uint_t(1), const field::Layout & _layout = field::zyxf,
              const shared_ptr< field::FieldAllocator<real_t> > & alloc = shared_ptr< field::FieldAllocator<real_t> >() );
 
-   virtual ~PdfField() {}
+   virtual ~PdfField() = default;
 
 
 
diff --git a/tests/cuda/CMakeLists.txt b/tests/cuda/CMakeLists.txt
index 4c675bc9c..e6af59ed1 100644
--- a/tests/cuda/CMakeLists.txt
+++ b/tests/cuda/CMakeLists.txt
@@ -27,8 +27,10 @@ if( WALBERLA_BUILD_WITH_CODEGEN )
     waLBerla_generate_target_from_python(NAME LbCodeGenerationExampleGeneratedGPU
             FILE codegen/LbCodeGenerationExampleGPU.py
             OUT_FILES LbCodeGenerationExample_LatticeModel.cu LbCodeGenerationExample_LatticeModel.h
+            PDF_Setter.cu PDF_Setter.h
             LbCodeGenerationExample_NoSlip.cu LbCodeGenerationExample_NoSlip.h
-            LbCodeGenerationExample_UBB.cu LbCodeGenerationExample_UBB.h )
+            LbCodeGenerationExample_UBB.cu LbCodeGenerationExample_UBB.h
+            PackInfo.cu PackInfo.h)
     waLBerla_compile_test( FILES codegen/LbCodeGenerationExampleGPU.cpp DEPENDS LbCodeGenerationExampleGeneratedGPU field blockforest geometry timeloop vtk cuda)
 
     waLBerla_generate_target_from_python(NAME CodegenJacobiGPUGeneratedCudaJacobiKernel FILE codegen/CudaJacobiKernel.py
diff --git a/tests/cuda/codegen/LbCodeGenerationExampleGPU.cpp b/tests/cuda/codegen/LbCodeGenerationExampleGPU.cpp
index 6329445a0..2ccc59e9d 100644
--- a/tests/cuda/codegen/LbCodeGenerationExampleGPU.cpp
+++ b/tests/cuda/codegen/LbCodeGenerationExampleGPU.cpp
@@ -13,8 +13,8 @@
 //  You should have received a copy of the GNU General Public License along
 //  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
-//! \file SrtWithForceField.cpp
-//! \author Martin Bauer <martin.bauer@fau.de>
+//! \file LBCodeGenerationExampleGPU.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
 //
 //======================================================================================================================
 
@@ -25,6 +25,11 @@
 #include "geometry/all.h"
 #include "timeloop/all.h"
 
+#include "cuda/AddGPUFieldToStorage.h"
+#include "cuda/DeviceSelectMPI.h"
+#include "cuda/communication/UniformGPUScheme.h"
+
+
 #include "lbm/field/PdfField.h"
 #include "lbm/field/AddToStorage.h"
 #include "lbm/communication/PdfFieldPackInfo.h"
@@ -33,28 +38,31 @@
 
 #include "LbCodeGenerationExample_UBB.h"
 #include "LbCodeGenerationExample_NoSlip.h"
+#include "PackInfo.h"
+#include "PDF_Setter.h"
 #include "LbCodeGenerationExample_LatticeModel.h"
 
-
 using namespace walberla;
 
 typedef lbm::LbCodeGenerationExample_LatticeModel LatticeModel_T;
 typedef LatticeModel_T::Stencil                   Stencil_T;
 typedef LatticeModel_T::CommunicationStencil      CommunicationStencil_T;
 typedef lbm::PdfField< LatticeModel_T >           PdfField_T;
+typedef pystencils::PackInfo PackInfo_T;
 
 typedef GhostLayerField< real_t, LatticeModel_T::Stencil::D > VectorField_T;
+typedef GhostLayerField< real_t, 19 > Test_T;
 typedef GhostLayerField< real_t, 1 > ScalarField_T;
 
 typedef walberla::uint8_t    flag_t;
 typedef FlagField< flag_t >  FlagField_T;
 
-
+typedef cuda::GPUField<double> GPUField;
 
 int main( int argc, char ** argv )
 {
    walberla::Environment walberlaEnv( argc, argv );
-
+   cuda::selectDeviceBasedOnMpiRank();
    auto blocks = blockforest::createUniformBlockGridFromConfig( walberlaEnv.config() );
 
    // read parameters
@@ -67,12 +75,26 @@ int main( int argc, char ** argv )
    const double remainingTimeLoggerFrequency = parameters.getParameter< double >( "remainingTimeLoggerFrequency", 3.0 ); // in seconds
 
    // create fields
-   BlockDataID forceFieldId = field::addToStorage<VectorField_T>( blocks, "Force", real_t( 0.0 ));
    BlockDataID velFieldId = field::addToStorage<VectorField_T>( blocks, "Velocity", real_t( 0.0 ));
-   BlockDataID omegaFieldId = field::addToStorage<ScalarField_T>( blocks, "Omega", real_t( 0.0 ));
+   BlockDataID velFieldIdGPU = cuda::addGPUFieldToStorage<VectorField_T >( blocks, velFieldId, "Velocity field on GPU", true );
+
+   // the generated latticeModel is for GPU usage only. Thus all input data has to be allocated on GPU
+   LatticeModel_T latticeModel = LatticeModel_T( velFieldIdGPU, omega );
+
+   // LatticeModel_T latticeModel = LatticeModel_T( velFieldIdGPU, omega );
+   // BlockDataID pdfFieldId = field::addToStorage<Test_T>(blocks, "pdf field", real_t(0), field::fzyx);
+   BlockDataID pdfFieldId = lbm::addPdfFieldToStorage( blocks, "pdf field", latticeModel, initialVelocity, real_t(1), field::fzyx );
+   BlockDataID pdfFieldIdGPU = cuda::addGPUPdfFieldToStorage<PdfField_T, LatticeModel_T>( blocks, pdfFieldId, latticeModel, true, "PDF field on GPU" );
+
+
+   pystencils::PDF_Setter pdf_setter(pdfFieldIdGPU);
+
+   WALBERLA_LOG_INFO_ON_ROOT("initialization of the distributions")
+   for (auto &block : *blocks)
+   {
+      pdf_setter(&block);
+   }
 
-   LatticeModel_T latticeModel = LatticeModel_T( forceFieldId, omegaFieldId, velFieldId, omega );
-   BlockDataID pdfFieldId = lbm::addPdfFieldToStorage( blocks, "pdf field", latticeModel, initialVelocity, real_t(1) );
    BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >( blocks, "flag field" );
 
    // create and initialize boundary handling
@@ -81,8 +103,8 @@ int main( int argc, char ** argv )
 
    auto boundariesConfig = walberlaEnv.config()->getOneBlock( "Boundaries" );
 
-   lbm::LbCodeGenerationExample_UBB ubb(blocks, pdfFieldId);
-   lbm::LbCodeGenerationExample_NoSlip noSlip(blocks, pdfFieldId);
+   lbm::LbCodeGenerationExample_UBB ubb(blocks, pdfFieldIdGPU);
+   lbm::LbCodeGenerationExample_NoSlip noSlip(blocks, pdfFieldIdGPU);
 
    geometry::initBoundaryHandling<FlagField_T>(*blocks, flagFieldId, boundariesConfig);
    geometry::setNonBoundaryCellsToDomain<FlagField_T>(*blocks, flagFieldId, fluidFlagUID);
@@ -94,31 +116,34 @@ int main( int argc, char ** argv )
    SweepTimeloop timeloop( blocks->getBlockStorage(), timesteps );
 
    // create communication for PdfField
-   blockforest::communication::UniformBufferedScheme< CommunicationStencil_T > communication( blocks );
-   communication.addPackInfo( make_shared< lbm::PdfFieldPackInfo< LatticeModel_T > >( pdfFieldId ) );
+   cuda::communication::UniformGPUScheme< Stencil_T > com(blocks, 0);
+   com.addPackInfo(make_shared< PackInfo_T >(pdfFieldIdGPU));
+   auto communication = std::function< void() >([&]() { com.communicate(nullptr); });
 
    // add LBM sweep and communication to time loop
    timeloop.add() << BeforeFunction( communication, "communication" )
                   << Sweep( noSlip, "noSlip boundary" );
    timeloop.add() << Sweep( ubb, "ubb boundary" );
-   timeloop.add() << Sweep( LatticeModel_T::Sweep( pdfFieldId ), "LB stream & collide" );
-
-   // LBM stability check
-   timeloop.addFuncAfterTimeStep( makeSharedFunctor( field::makeStabilityChecker< PdfField_T, FlagField_T >( walberlaEnv.config(), blocks, pdfFieldId,
-                                                                                                             flagFieldId, fluidFlagUID ) ),
-                                  "LBM stability check" );
+   timeloop.add() << Sweep( LatticeModel_T::Sweep( pdfFieldIdGPU ), "LB stream & collide" );
 
    // log remaining time
    timeloop.addFuncAfterTimeStep( timing::RemainingTimeLogger( timeloop.getNrOfTimeSteps(), remainingTimeLoggerFrequency ), "remaining time logger" );
 
    // add VTK output to time loop
-   lbm::VTKOutput< LatticeModel_T, FlagField_T >::addToTimeloop( timeloop, blocks, walberlaEnv.config(), pdfFieldId, flagFieldId, fluidFlagUID );
-
-   // create adaptors, so that the GUI also displays density and velocity
-   // adaptors are like fields with the difference that they do not store values
-   // but calculate the values based on other fields ( here the PdfField )
-   field::addFieldAdaptor<lbm::Adaptor<LatticeModel_T>::Density>       ( blocks, pdfFieldId, "DensityAdaptor" );
-   field::addFieldAdaptor<lbm::Adaptor<LatticeModel_T>::VelocityVector>( blocks, pdfFieldId, "VelocityAdaptor" );
+   uint_t vtkWriteFrequency = parameters.getParameter<uint_t>("VTKwriteFrequency", 0);
+   if (vtkWriteFrequency > 0)
+   {
+      auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
+                                                      "simulation_step", false, true, true, false, 0);
+      vtkOutput->addBeforeFunction( [&]() {
+        cuda::fieldCpy<VectorField_T, GPUField>( blocks, velFieldId, velFieldIdGPU );
+      });
+      auto Writer = make_shared<field::VTKWriter<VectorField_T>>(velFieldId, "Velocity");
+      vtkOutput->addCellDataWriter(Writer);
+
+      timeloop.addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
+   }
+   // lbm::VTKOutput< LatticeModel_T, FlagField_T >::addToTimeloop( timeloop, blocks, walberlaEnv.config(), pdfFieldId, flagFieldId, fluidFlagUID );
 
    timeloop.run();
 
diff --git a/tests/cuda/codegen/LbCodeGenerationExampleGPU.prm b/tests/cuda/codegen/LbCodeGenerationExampleGPU.prm
index f233188ac..e79c716eb 100644
--- a/tests/cuda/codegen/LbCodeGenerationExampleGPU.prm
+++ b/tests/cuda/codegen/LbCodeGenerationExampleGPU.prm
@@ -4,7 +4,7 @@ Parameters
 	omega           1.8;
 	timesteps       500;
 
-	useGui 0;
+	VTKwriteFrequency 100;
 	remainingTimeLoggerFrequency 3; // in seconds
 }
 
diff --git a/tests/cuda/codegen/LbCodeGenerationExampleGPU.py b/tests/cuda/codegen/LbCodeGenerationExampleGPU.py
index 3c01f0ae0..fb699422a 100644
--- a/tests/cuda/codegen/LbCodeGenerationExampleGPU.py
+++ b/tests/cuda/codegen/LbCodeGenerationExampleGPU.py
@@ -1,23 +1,42 @@
 import sympy as sp
 import pystencils as ps
-from lbmpy.creationfunctions import create_lb_collision_rule
+from lbmpy.stencils import get_stencil
+from lbmpy.creationfunctions import create_lb_method, create_lb_collision_rule, create_lb_update_rule
 from lbmpy.boundaries import NoSlip, UBB
-from pystencils_walberla import CodeGeneration
+from lbmpy.macroscopic_value_kernels import macroscopic_values_setter
+from pystencils_walberla import CodeGeneration, generate_sweep, generate_pack_info_from_kernel
 from lbmpy_walberla import RefinementScaling, generate_boundary, generate_lattice_model
 
 with CodeGeneration() as ctx:
-    omega, omega_free = sp.symbols("omega, omega_free")
-    force_field, vel_field, omega_out = ps.fields("force(3), velocity(3), omega_out: [3D]", layout='zyxf')
+    stencil = get_stencil("D3Q19")
 
-    # the collision rule of the LB method where the some advanced features
+    dim = len(stencil[0])
+    q = len(stencil)
+
+    omega = sp.symbols("omega")
+    omega_free = 1
+    vel_field = ps.fields(f"velocity({dim}): [3D]", layout='zyxf')
+    pdfs = ps.fields(f"pdfs({q}): [3D]", layout='zyxf')
+
+    lb_method = create_lb_method(stencil=stencil, compressible=True, method='mrt',
+                                 relaxation_rates=[omega, omega, omega_free, omega_free, omega_free, omega_free])
+
+    # the collision rule of the LB method
     collision_rule = create_lb_collision_rule(
-        stencil='D3Q19', compressible=True,
-        method='mrt', relaxation_rates=[omega, omega, omega_free, omega_free, omega_free, omega_free],
+        lb_method=lb_method,
+        output={'velocity': vel_field},
         optimization={'cse_global': True}
     )
 
+    update_rule = create_lb_update_rule(collision_rule=collision_rule)
+
+    setter_assignments = macroscopic_values_setter(lb_method, velocity=[0]*dim, pdfs=pdfs.center_vector, density=1)
+
     # generate lattice model and (optionally) boundary conditions
     # for CPU simulations waLBerla's internal boundary handling can be used as well
     generate_lattice_model(ctx, 'LbCodeGenerationExample_LatticeModel', collision_rule, target='gpu')
+    generate_sweep(ctx, 'PDF_Setter', setter_assignments, target='gpu')
     generate_boundary(ctx, 'LbCodeGenerationExample_UBB', UBB([0.05, 0, 0]), collision_rule.method, target='gpu')
     generate_boundary(ctx, 'LbCodeGenerationExample_NoSlip', NoSlip(), collision_rule.method, target='gpu')
+
+    generate_pack_info_from_kernel(ctx, "PackInfo", update_rule, target='gpu')
diff --git a/tests/lbm/codegen/LbCodeGenerationExample.cpp b/tests/lbm/codegen/LbCodeGenerationExample.cpp
index 665292f1b..8ff9abd38 100644
--- a/tests/lbm/codegen/LbCodeGenerationExample.cpp
+++ b/tests/lbm/codegen/LbCodeGenerationExample.cpp
@@ -54,27 +54,29 @@ typedef FlagField< flag_t >  FlagField_T;
 
 int main( int argc, char ** argv )
 {
-   walberla::Environment walberlaEnv( argc, argv );
+   walberla::Environment walberlaEnv(argc, argv);
 
-   auto blocks = blockforest::createUniformBlockGridFromConfig( walberlaEnv.config() );
+   auto blocks = blockforest::createUniformBlockGridFromConfig(walberlaEnv.config());
 
    // read parameters
-   auto parameters = walberlaEnv.config()->getOneBlock( "Parameters" );
+   auto parameters = walberlaEnv.config()->getOneBlock("Parameters");
 
-   const real_t          omega           = parameters.getParameter< real_t >         ( "omega",           real_c( 1.4 ) );
-   const Vector3<real_t> initialVelocity = parameters.getParameter< Vector3<real_t> >( "initialVelocity", Vector3<real_t>() );
-   const uint_t          timesteps       = parameters.getParameter< uint_t >         ( "timesteps",       uint_c( 10 )  );
+   const real_t omega = parameters.getParameter< real_t >("omega", real_c(1.4));
+   const Vector3< real_t > initialVelocity =
+      parameters.getParameter< Vector3< real_t > >("initialVelocity", Vector3< real_t >());
+   const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(10));
 
-   const double remainingTimeLoggerFrequency = parameters.getParameter< double >( "remainingTimeLoggerFrequency", 3.0 ); // in seconds
+   const double remainingTimeLoggerFrequency =
+      parameters.getParameter< double >("remainingTimeLoggerFrequency", 3.0); // in seconds
 
    // create fields
-   BlockDataID forceFieldId = field::addToStorage<VectorField_T>( blocks, "Force", real_t( 0.0 ));
-   BlockDataID velFieldId = field::addToStorage<VectorField_T>( blocks, "Velocity", real_t( 0.0 ));
-   BlockDataID omegaFieldId = field::addToStorage<ScalarField_T>( blocks, "Omega", real_t( 0.0 ));
+   BlockDataID forceFieldId = field::addToStorage< VectorField_T >(blocks, "Force", real_t(0.0));
+   BlockDataID velFieldId   = field::addToStorage< VectorField_T >(blocks, "Velocity", real_t(0.0));
+   BlockDataID omegaFieldId = field::addToStorage< ScalarField_T >(blocks, "Omega", real_t(0.0));
 
-   LatticeModel_T latticeModel = LatticeModel_T( forceFieldId, omegaFieldId, velFieldId, omega );
-   BlockDataID pdfFieldId = lbm::addPdfFieldToStorage( blocks, "pdf field", latticeModel, initialVelocity, real_t(1) );
-   BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >( blocks, "flag field" );
+   LatticeModel_T latticeModel = LatticeModel_T(forceFieldId, omegaFieldId, velFieldId, omega);
+   BlockDataID pdfFieldId  = lbm::addPdfFieldToStorage(blocks, "pdf field", latticeModel, initialVelocity, real_t(1));
+   BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field");
 
    // create and initialize boundary handling
    const FlagUID fluidFlagUID( "Fluid" );
-- 
GitLab