From 7bd039fa50c1deeb957dee4d6fa596b6372d7f95 Mon Sep 17 00:00:00 2001 From: markus holzer <markus.holzer@fau.de> Date: Sun, 8 Nov 2020 23:20:42 +0100 Subject: [PATCH] Latticemodel for GPU test case working --- .../templates/LatticeModel.tmpl.cpp | 22 +- .../templates/LatticeModel.tmpl.h | 12 -- src/cuda/AddGPUFieldToStorage.h | 17 ++ src/cuda/AddGPUFieldToStorage.impl.h | 204 +++++++++++++----- src/cuda/CMakeLists.txt | 2 +- src/cuda/GPUBlockDataHandling.h | 104 +++++++++ src/cuda/GPUField.h | 4 +- src/cuda/GPUPdfField.h | 79 +++++++ src/field/GhostLayerField.h | 2 +- src/field/blockforest/BlockDataHandling.h | 2 +- src/lbm/field/PdfField.h | 2 +- tests/cuda/CMakeLists.txt | 4 +- .../codegen/LbCodeGenerationExampleGPU.cpp | 77 ++++--- .../codegen/LbCodeGenerationExampleGPU.prm | 2 +- .../codegen/LbCodeGenerationExampleGPU.py | 33 ++- tests/lbm/codegen/LbCodeGenerationExample.cpp | 28 +-- 16 files changed, 467 insertions(+), 127 deletions(-) create mode 100644 src/cuda/GPUBlockDataHandling.h create mode 100644 src/cuda/GPUPdfField.h diff --git a/python/lbmpy_walberla/templates/LatticeModel.tmpl.cpp b/python/lbmpy_walberla/templates/LatticeModel.tmpl.cpp index 39360a408..a7c94a37f 100644 --- a/python/lbmpy_walberla/templates/LatticeModel.tmpl.cpp +++ b/python/lbmpy_walberla/templates/LatticeModel.tmpl.cpp @@ -20,7 +20,7 @@ #include "core/DataTypes.h" #include "core/Macros.h" -#include "lbm/field/PdfField.h" +{%if target is equalto 'gpu'%}#include "cuda/GPUPdfField.h"{% else %}#include "lbm/field/PdfField.h"{% endif %} #include "lbm/sweeps/Streaming.h" #include "{{class_name}}.h" @@ -65,13 +65,16 @@ void {{class_name}}::Sweep::streamCollide( IBlock * block, const uint_t numberOf { {{stream_collide_kernel|generate_block_data_to_field_extraction(parameters=['pdfs', 'pdfs_tmp'])|indent(4)}} - auto & lm = pdfs->latticeModel(); - //auto & lm = dynamic_cast< lbm::PdfField<{{class_name}}> * > (pdfs)->latticeModel(); + {%if target is equalto 'gpu'%} + auto & lm = static_cast< cuda::GPUPdfField<{{class_name}}> * > (pdfs)->latticeModel(); + {% else %} + auto & lm = dynamic_cast< lbm::PdfField<{{class_name}}> * > (pdfs)->latticeModel(); + {% endif %} WALBERLA_ASSERT_EQUAL( *(lm.blockId_), block->getId() ); {{stream_collide_kernel|generate_refs_for_kernel_parameters(prefix='lm.', parameters_to_ignore=['pdfs', 'pdfs_tmp'])|indent(4) }} {%if target is equalto 'gpu'%} - {{stream_collide_kernel|generate_call('cell_idx_c(numberOfGhostLayersToInclude)', stream='stream')|indent(4)}} + {{stream_collide_kernel|generate_call(stream='stream')|indent(4)}} {% else %} {{stream_collide_kernel|generate_call('cell_idx_c(numberOfGhostLayersToInclude)')|indent(4)}} {% endif %} @@ -83,13 +86,16 @@ void {{class_name}}::Sweep::collide( IBlock * block, const uint_t numberOfGhostL { {{collide_kernel|generate_block_data_to_field_extraction(parameters=['pdfs'])|indent(4)}} - auto & lm = pdfs->latticeModel(); - //auto & lm = dynamic_cast< lbm::PdfField<{{class_name}}> * > (pdfs)->latticeModel(); + {%if target is equalto 'gpu'%} + auto & lm = static_cast< cuda::GPUPdfField<{{class_name}}> * > (pdfs)->latticeModel(); + {% else %} + auto & lm = dynamic_cast< lbm::PdfField<{{class_name}}> * > (pdfs)->latticeModel(); + {% endif %} WALBERLA_ASSERT_EQUAL( *(lm.blockId_), block->getId() ); {{collide_kernel|generate_refs_for_kernel_parameters(prefix='lm.', parameters_to_ignore=['pdfs', 'pdfs_tmp'])|indent(4) }} {%if target is equalto 'gpu'%} - {{collide_kernel|generate_call('cell_idx_c(numberOfGhostLayersToInclude)', stream='stream')|indent(4)}} + {{collide_kernel|generate_call(stream='stream')|indent(4)}} {% else %} {{collide_kernel|generate_call('cell_idx_c(numberOfGhostLayersToInclude)')|indent(4)}} {% endif %} @@ -101,7 +107,7 @@ void {{class_name}}::Sweep::stream( IBlock * block, const uint_t numberOfGhostLa {{stream_kernel|generate_block_data_to_field_extraction(parameters=['pdfs', 'pdfs_tmp'])|indent(4)}} {%if target is equalto 'gpu'%} - {{stream_kernel|generate_call('cell_idx_c(numberOfGhostLayersToInclude)', stream='stream')|indent(4)}} + {{stream_kernel|generate_call(stream='stream')|indent(4)}} {% else %} {{stream_kernel|generate_call('cell_idx_c(numberOfGhostLayersToInclude)')|indent(4)}} {% endif %} diff --git a/python/lbmpy_walberla/templates/LatticeModel.tmpl.h b/python/lbmpy_walberla/templates/LatticeModel.tmpl.h index 1b1aacc33..be81e2696 100644 --- a/python/lbmpy_walberla/templates/LatticeModel.tmpl.h +++ b/python/lbmpy_walberla/templates/LatticeModel.tmpl.h @@ -345,9 +345,6 @@ struct Equilibrium< {{class_name}}, void > static void set( FieldPtrOrIterator & it, const Vector3< real_t > & u = Vector3< real_t >( real_t(0.0) ), real_t rho = real_t(1.0) ) { - {%if target is equalto 'gpu'%} - WALBERLA_ABORT("Equilibrium setter Not implemented for target GPU"); - {% else %} {%if not compressible %} rho -= real_t(1.0); {%endif %} @@ -355,16 +352,12 @@ struct Equilibrium< {{class_name}}, void > {% for eqTerm in equilibrium -%} it[{{loop.index0 }}] = {{eqTerm}}; {% endfor -%} - {% endif %} } template< typename PdfField_T > static void set( PdfField_T & pdf, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z, const Vector3< real_t > & u = Vector3< real_t >( real_t(0.0) ), real_t rho = real_t(1.0) ) { - {%if target is equalto 'gpu'%} - WALBERLA_ABORT("Equilibrium setter Not implemented for target GPU"); - {% else %} {%if not compressible %} rho -= real_t(1.0); {%endif %} @@ -373,7 +366,6 @@ struct Equilibrium< {{class_name}}, void > {% for eqTerm in equilibrium -%} pdf.getF( &xyz0, {{loop.index0 }})= {{eqTerm}}; {% endfor -%} - {% endif %} } }; @@ -463,9 +455,6 @@ struct DensityAndVelocityRange<{{class_name}}, FieldIteratorXYZ> static void set( FieldIteratorXYZ & begin, const FieldIteratorXYZ & end, const {{class_name}} & lm, const Vector3< real_t > & u = Vector3< real_t >( real_t(0.0) ), const real_t rho_in = real_t(1.0) ) { - {%if target is equalto 'gpu'%} - WALBERLA_ABORT("DensityAndVelocityRange Not implemented for target GPU"); - {% else %} for( auto cellIt = begin; cellIt != end; ++cellIt ) { const auto x = cellIt.x(); @@ -478,7 +467,6 @@ struct DensityAndVelocityRange<{{class_name}}, FieldIteratorXYZ> Equilibrium<{{class_name}}>::set(cellIt, Vector3<real_t>(u_0, u_1, u_2), rho{%if not compressible %} + real_t(1) {%endif%}); } - {% endif %} } }; diff --git a/src/cuda/AddGPUFieldToStorage.h b/src/cuda/AddGPUFieldToStorage.h index 3803ff8c8..65734439a 100644 --- a/src/cuda/AddGPUFieldToStorage.h +++ b/src/cuda/AddGPUFieldToStorage.h @@ -65,6 +65,23 @@ namespace cuda { + //******************************************************************************************************************* + /*! Adds a cuda::GPUPdfField to a StructuredBlockStorage using data from a CPU PDF field + * + * - adds a GPU PDF field to a StructuredBlockStorage using a CPU PDF field + * - acts similar to addGPUFieldToStorage but needs a latticeModel for GPU usage + * @tparam Field_T type of the CPU field, the created GPUField will be of type cuda::GPUField<Field_T::value_type> + */ + //******************************************************************************************************************* + template< typename Field_T, typename LatticeModel_T> + BlockDataID addGPUPdfFieldToStorage( const shared_ptr< StructuredBlockStorage > & bs, + ConstBlockDataID cpuFieldID, + const LatticeModel_T & _latticeModel, + const std::string & identifier, + bool usePitchedMem = true ); + + + } // namespace cuda } // namespace walberla diff --git a/src/cuda/AddGPUFieldToStorage.impl.h b/src/cuda/AddGPUFieldToStorage.impl.h index 1befc3e81..1dff45b1c 100644 --- a/src/cuda/AddGPUFieldToStorage.impl.h +++ b/src/cuda/AddGPUFieldToStorage.impl.h @@ -22,75 +22,173 @@ #pragma once #include "cuda/FieldCopy.h" +#include "cuda/GPUPdfField.h" +#include "cuda/GPUBlockDataHandling.h" +#include "lbm/field/PdfField.h" + +namespace walberla +{ +namespace cuda +{ +namespace internal +{ +template< typename GPUField_T > +GPUField_T* createGPUField(const IBlock* const block, const StructuredBlockStorage* const bs, uint_t ghostLayers, + uint_t fSize, const field::Layout& layout, bool usePitchedMem) +{ + return new GPUField_T(bs->getNumberOfXCells(*block), bs->getNumberOfYCells(*block), bs->getNumberOfZCells(*block), + fSize, ghostLayers, layout, usePitchedMem); +} + +template< typename Field_T > +GPUField< typename Field_T::value_type >* createGPUFieldFromCPUField(const IBlock* const block, + const StructuredBlockStorage* const, + ConstBlockDataID cpuFieldID, bool usePitchedMem) +{ + typedef GPUField< typename Field_T::value_type > GPUField_T; + + const Field_T* f = block->getData< Field_T >(cpuFieldID); + auto gpuField = + new GPUField_T(f->xSize(), f->ySize(), f->zSize(), f->fSize(), f->nrOfGhostLayers(), f->layout(), usePitchedMem); + + cuda::fieldCpy(*gpuField, *f); + + return gpuField; +} + +template<typename PdfField_T, typename LatticeModel_T > +class GPUPdfFieldHandling + : public cuda::GPUBlockDataHandling< GPUPdfField<LatticeModel_T>, LatticeModel_T::Stencil::D == 2 > +{ + public: + typedef GPUPdfField< LatticeModel_T > GPUPdfField_T; + // typedef field::BlockDataHandling< GPUPdfField_T, LatticeModel_T::Stencil::D == 2 > Base_T; + + GPUPdfFieldHandling(const weak_ptr< StructuredBlockStorage >& blocks, ConstBlockDataID cpu_fieldID, + const LatticeModel_T& latticeModel, bool usePitchedMem) + : blocks_(blocks), cpu_fieldID_(cpu_fieldID), latticeModel_(latticeModel), usePitchedMem_(usePitchedMem) + {} + + inline void serialize(IBlock* const block, const BlockDataID& id, mpi::SendBuffer& buffer) + { + packLatticeModel(block, id, buffer); + } -namespace walberla { -namespace cuda { + void serializeCoarseToFine(Block* const block, const BlockDataID& id, mpi::SendBuffer& buffer, const uint_t child) + { + packLatticeModel(block, id, buffer); + } + void serializeFineToCoarse(Block* const block, const BlockDataID& id, mpi::SendBuffer& buffer) + { + packLatticeModel(block, id, buffer); + } - namespace internal + void deserialize(IBlock* const block, const BlockDataID& id, mpi::RecvBuffer& buffer) { - template< typename GPUField_T> - GPUField_T * createGPUField( const IBlock * const block, - const StructuredBlockStorage * const bs, - uint_t ghostLayers, - uint_t fSize, - const field::Layout & layout, - bool usePitchedMem ) - { - return new GPUField_T( bs->getNumberOfXCells( *block ), - bs->getNumberOfYCells( *block ), - bs->getNumberOfZCells( *block ), - fSize, ghostLayers, layout, usePitchedMem ); - } - - template< typename Field_T> - GPUField< typename Field_T::value_type> * - createGPUFieldFromCPUField( const IBlock * const block, - const StructuredBlockStorage * const, - ConstBlockDataID cpuFieldID, - bool usePitchedMem - ) - { - typedef GPUField< typename Field_T::value_type> GPUField_T; - - const Field_T * f = block->getData<Field_T>( cpuFieldID ); - auto gpuField = new GPUField_T( f->xSize(), f->ySize(), f->zSize(), f->fSize(), - f->nrOfGhostLayers(), f->layout(), usePitchedMem ); - - cuda::fieldCpy( *gpuField, *f ); - - return gpuField; - } + unpackLatticeModel(block, id, buffer); + } + void deserializeCoarseToFine(Block* const block, const BlockDataID& id, mpi::RecvBuffer& buffer) + { + unpackLatticeModel(block, id, buffer); + } + + void deserializeFineToCoarse(Block* const block, const BlockDataID& id, mpi::RecvBuffer& buffer, const uint_t child) + { + unpackLatticeModel(block, id, buffer); } + protected: + GPUPdfField_T* allocate(IBlock* const block) + { + return allocateDispatch(block, cpu_fieldID_, usePitchedMem_); + } - template< typename GPUField_T> - BlockDataID addGPUFieldToStorage(const shared_ptr< StructuredBlockStorage >& bs, - const std::string & identifier, - uint_t fSize, - const Layout layout, - uint_t nrOfGhostLayers, - bool usePitchedMem ) + GPUPdfField_T* reallocate(IBlock* const block) { - auto func = std::bind ( internal::createGPUField<GPUField_T>, std::placeholders::_1, std::placeholders::_2, nrOfGhostLayers, fSize, layout, usePitchedMem ); - return bs->addStructuredBlockData< GPUField_T >( func, identifier ); +#ifdef NDEBUG + return allocateDispatch(block, cpu_fieldID_, usePitchedMem_); +#else + return allocateDispatch(block, cpu_fieldID_, usePitchedMem_); +#endif } + private: + void packLatticeModel(IBlock* const block, const BlockDataID& id, mpi::SendBuffer& buffer) const + { + const GPUPdfField_T * field = block->template getData< GPUPdfField_T >(id); + WALBERLA_CHECK_NOT_NULLPTR(field); + buffer << field->latticeModel(); + } - template< typename Field_T> - BlockDataID addGPUFieldToStorage( const shared_ptr< StructuredBlockStorage > & bs, - ConstBlockDataID cpuFieldID, - const std::string & identifier, - bool usePitchedMem ) + void unpackLatticeModel(IBlock* const block, const BlockDataID& id, mpi::RecvBuffer& buffer) const { - auto func = std::bind ( internal::createGPUFieldFromCPUField<Field_T>, std::placeholders::_1, std::placeholders::_2, cpuFieldID, usePitchedMem ); - return bs->addStructuredBlockData< GPUField<typename Field_T::value_type> >( func, identifier ); + GPUPdfField_T * field = block->template getData< GPUPdfField_T >(id); + WALBERLA_CHECK_NOT_NULLPTR(field); + + LatticeModel_T latticeModel = field->latticeModel(); + buffer >> latticeModel; + + auto blocks = blocks_.lock(); + WALBERLA_CHECK_NOT_NULLPTR(blocks); + + latticeModel.configure(*block, *blocks); + field->resetLatticeModel(latticeModel); } + GPUPdfField_T* allocateDispatch(IBlock* const block, ConstBlockDataID cpu_fieldID, bool usePitchedMem) + { + WALBERLA_ASSERT_NOT_NULLPTR( block ); + auto blocks = blocks_.lock(); + WALBERLA_CHECK_NOT_NULLPTR( blocks ); + const PdfField_T * f = block->template getData< PdfField_T >(cpu_fieldID); -} // namespace cuda -} // namespace walberla + LatticeModel_T latticeModel = f->latticeModel(); + latticeModel_ = latticeModel; + + auto gpuField = new GPUPdfField_T(f->xSize(), f->ySize(), f->zSize(), f->fSize(), latticeModel, + f->nrOfGhostLayers(), f->layout(), usePitchedMem); + cuda::fieldCpy(*gpuField, *f); + + return gpuField; + } + weak_ptr< StructuredBlockStorage > blocks_; + + ConstBlockDataID cpu_fieldID_; + LatticeModel_T latticeModel_; + bool usePitchedMem_; + +}; // class PdfFieldHandling +} // namespace internal + +template< typename GPUField_T > +BlockDataID addGPUFieldToStorage(const shared_ptr< StructuredBlockStorage >& bs, const std::string& identifier, + uint_t fSize, const Layout layout, uint_t nrOfGhostLayers, bool usePitchedMem) +{ + auto func = std::bind(internal::createGPUField< GPUField_T >, std::placeholders::_1, std::placeholders::_2, + nrOfGhostLayers, fSize, layout, usePitchedMem); + return bs->addStructuredBlockData< GPUField_T >(func, identifier); +} + +template< typename Field_T > +BlockDataID addGPUFieldToStorage(const shared_ptr< StructuredBlockStorage >& bs, ConstBlockDataID cpuFieldID, + const std::string& identifier, bool usePitchedMem) +{ + auto func = std::bind(internal::createGPUFieldFromCPUField< Field_T >, std::placeholders::_1, std::placeholders::_2, + cpuFieldID, usePitchedMem); + return bs->addStructuredBlockData< GPUField< typename Field_T::value_type > >(func, identifier); +} + +template< typename PdfField_T, typename LatticeModel_T, typename BlockStorage_T > +BlockDataID addGPUPdfFieldToStorage(const shared_ptr< BlockStorage_T >& bs, ConstBlockDataID cpu_fieldID, + const LatticeModel_T& _latticeModel, bool usePitchedMem, const std::string& identifier) +{ + return bs->addBlockData(make_shared< internal::GPUPdfFieldHandling<PdfField_T, LatticeModel_T > >( + bs, cpu_fieldID, _latticeModel, usePitchedMem), identifier); +} +} // namespace cuda +} // namespace walberla diff --git a/src/cuda/CMakeLists.txt b/src/cuda/CMakeLists.txt index 98aa991f0..b5174a3e5 100644 --- a/src/cuda/CMakeLists.txt +++ b/src/cuda/CMakeLists.txt @@ -4,7 +4,7 @@ # ################################################################################################### -waLBerla_add_module( DEPENDS blockforest core communication domain_decomposition executiontree python_coupling field stencil +waLBerla_add_module( DEPENDS blockforest core communication domain_decomposition executiontree python_coupling field stencil lbm BUILD_ONLY_IF_FOUND CUDA ) ################################################################################################### \ No newline at end of file diff --git a/src/cuda/GPUBlockDataHandling.h b/src/cuda/GPUBlockDataHandling.h new file mode 100644 index 000000000..6b34d2434 --- /dev/null +++ b/src/cuda/GPUBlockDataHandling.h @@ -0,0 +1,104 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file GPUBlockDataHandling.h +//! \ingroup cuda +//! \author Markus Holzer <markus.holzer@fau.de> +// +//====================================================================================================================== + +#pragma once + +#include "blockforest/BlockDataHandling.h" +#include "blockforest/StructuredBlockForest.h" +#include "core/debug/CheckFunctions.h" +#include "core/math/Vector2.h" +#include "core/math/Vector3.h" +#include "field/FlagField.h" + +#include <type_traits> + + +namespace walberla { +namespace cuda { + + + +// still virtual, one must implement protected member functions 'allocate' and 'reallocate' +template< typename Field_T, bool Pseudo2D = false > +class GPUBlockDataHandling : public blockforest::BlockDataHandling< Field_T > +{ +public: + + typedef typename Field_T::value_type Value_T; + typedef std::function< void ( Field_T * field, IBlock * const block ) > InitializationFunction_T; + + virtual ~GPUBlockDataHandling() = default; + + void addInitializationFunction( const InitializationFunction_T & initFunction ) { initFunction_ = initFunction; } + + Field_T * initialize( IBlock * const block ) + { + Field_T * field = allocate( block ); + return field; + } + + inline void serialize( ) {}; + + void serializeCoarseToFine( ) {}; + void serializeFineToCoarse( ) {}; + + Field_T * deserialize( IBlock * const block ) { return reallocate( block ); } + + Field_T * deserializeCoarseToFine( Block * const block ) { return reallocate( block ); } + Field_T * deserializeFineToCoarse( Block * const block ) { return reallocate( block ); } + + void deserialize( ) {}; + + void deserializeCoarseToFine( ) {}; + void deserializeFineToCoarse( ) {}; + +protected: + + /// must be thread-safe ! + virtual Field_T * allocate( IBlock * const block ) = 0; // used in 'initialize' + /// must be thread-safe ! + virtual Field_T * reallocate( IBlock * const block ) = 0; // used in all deserialize member functions + + template< typename T > struct Merge + { static T result( const T & value ) { return Pseudo2D ? static_cast<T>( value / numeric_cast<T>(4) ) : static_cast<T>( value / numeric_cast<T>(8) ); } }; + + template< typename T > struct Merge< Vector2<T> > + { static Vector2<T> result( const Vector2<T> & value ) { return Pseudo2D ? (value / numeric_cast<T>(4)) : (value / numeric_cast<T>(8)); } }; + + template< typename T > struct Merge< Vector3<T> > + { static Vector3<T> result( const Vector3<T> & value ) { return Pseudo2D ? (value / numeric_cast<T>(4)) : (value / numeric_cast<T>(8)); } }; + + void sizeCheck( const uint_t xSize, const uint_t ySize, const uint_t zSize ) + { + WALBERLA_CHECK( (xSize & uint_t(1)) == uint_t(0), "The x-size of your field must be divisible by 2." ); + WALBERLA_CHECK( (ySize & uint_t(1)) == uint_t(0), "The y-size of your field must be divisible by 2." ); + if( Pseudo2D ) + { WALBERLA_CHECK( zSize == uint_t(1), "The z-size of your field must be equal to 1 (pseudo 2D mode)." ); } + else + { WALBERLA_CHECK( (zSize & uint_t(1)) == uint_t(0), "The z-size of your field must be divisible by 2." ); } + } + + InitializationFunction_T initFunction_; + +}; // class GPUBlockDataHandling + +} // namespace field +} // namespace walberla diff --git a/src/cuda/GPUField.h b/src/cuda/GPUField.h index 75766957a..3d56fbaa6 100755 --- a/src/cuda/GPUField.h +++ b/src/cuda/GPUField.h @@ -65,7 +65,7 @@ namespace cuda { GPUField( uint_t _xSize, uint_t _ySize, uint_t _zSize, uint_t _fSize, uint_t _nrOfGhostLayers, const Layout & _layout = zyxf, bool usePitchedMem = true ); - ~GPUField(); + virtual ~GPUField(); Layout layout() const { return layout_; } @@ -110,7 +110,7 @@ namespace cuda { bool hasSameAllocSize( const GPUField<T> & other ) const; bool hasSameSize( const GPUField<T> & other ) const; - GPUField<T> * cloneUninitialized() const; + virtual GPUField<T> * cloneUninitialized() const; void swapDataPointers( GPUField<T> & other ); void swapDataPointers( GPUField<T> * other ) { swapDataPointers( *other ); } diff --git a/src/cuda/GPUPdfField.h b/src/cuda/GPUPdfField.h new file mode 100644 index 000000000..1212e27d7 --- /dev/null +++ b/src/cuda/GPUPdfField.h @@ -0,0 +1,79 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file GPUPdfField.h +//! \ingroup cuda +//! \author Markus Holzer <markus.holzer@fau.de> +// +//====================================================================================================================== + +#pragma once + +#include "GPUField.h" + +namespace walberla +{ +namespace cuda +{ +template< typename LatticeModel_T > +class GPUPdfField : public GPUField< real_t > +{ + public: + //** Type Definitions ********************************************************************************************** + /*! \name Type Definitions */ + //@{ + typedef LatticeModel_T LatticeModel; + typedef typename LatticeModel_T::Stencil Stencil; + //@} + //******************************************************************************************************************* + GPUPdfField(uint_t _xSize, uint_t _ySize, uint_t _zSize, uint_t _fSize, const LatticeModel_T& _latticeModel, + uint_t _nrOfGhostLayers, const Layout& _layout = zyxf, bool usePitchedMem = true); + + virtual ~GPUPdfField() = default; + + const LatticeModel_T& latticeModel() const { return latticeModel_; } + LatticeModel_T& latticeModel() { return latticeModel_; } + + void resetLatticeModel(const LatticeModel_T& lm) { latticeModel_ = lm; } + + GPUPdfField<LatticeModel_T> * cloneUninitialized() const; + + protected: + LatticeModel_T latticeModel_; +}; + +template< typename LatticeModel_T > +GPUPdfField< LatticeModel_T >::GPUPdfField(uint_t _xSize, uint_t _ySize, uint_t _zSize, uint_t _fSize, + const LatticeModel_T& _latticeModel, uint_t _nrOfGhostLayers, + const Layout& _layout, bool usePitchedMem): + GPUField(_xSize, _ySize, _zSize, _fSize, _nrOfGhostLayers, _layout, usePitchedMem), latticeModel_(_latticeModel) +{ + +} + +template< typename LatticeModel_T > +GPUPdfField<LatticeModel_T> * GPUPdfField<LatticeModel_T>::cloneUninitialized() const +{ + GPUPdfField<LatticeModel_T> * res = new GPUPdfField<LatticeModel_T>( xSize(), ySize(), zSize(), fSize(), latticeModel_, + nrOfGhostLayers(), layout(), isPitchedMem() ); + + WALBERLA_ASSERT( hasSameAllocSize( *res ) ); + WALBERLA_ASSERT( hasSameSize( *res ) ); + WALBERLA_ASSERT( layout() == res->layout() ); + WALBERLA_ASSERT( isPitchedMem() == res->isPitchedMem() ); + return res; +} +} // namespace cuda +} // namespace walberla \ No newline at end of file diff --git a/src/field/GhostLayerField.h b/src/field/GhostLayerField.h index e6c8b454c..00d3b58d1 100644 --- a/src/field/GhostLayerField.h +++ b/src/field/GhostLayerField.h @@ -90,7 +90,7 @@ namespace field { const std::vector<T> & fValues, const Layout & layout = zyxf, const shared_ptr<FieldAllocator<T> > &alloc = shared_ptr<FieldAllocator<T> >() ); - virtual ~GhostLayerField() {} + virtual ~GhostLayerField() = default; diff --git a/src/field/blockforest/BlockDataHandling.h b/src/field/blockforest/BlockDataHandling.h index dc6328a66..98252f6c0 100644 --- a/src/field/blockforest/BlockDataHandling.h +++ b/src/field/blockforest/BlockDataHandling.h @@ -45,7 +45,7 @@ public: typedef typename Field_T::value_type Value_T; typedef std::function< void ( Field_T * field, IBlock * const block ) > InitializationFunction_T; - virtual ~BlockDataHandling() {} + virtual ~BlockDataHandling() = default; void addInitializationFunction( const InitializationFunction_T & initFunction ) { initFunction_ = initFunction; } diff --git a/src/lbm/field/PdfField.h b/src/lbm/field/PdfField.h index 5b4429877..4fd11e3b9 100644 --- a/src/lbm/field/PdfField.h +++ b/src/lbm/field/PdfField.h @@ -104,7 +104,7 @@ public: const uint_t ghostLayers = uint_t(1), const field::Layout & _layout = field::zyxf, const shared_ptr< field::FieldAllocator<real_t> > & alloc = shared_ptr< field::FieldAllocator<real_t> >() ); - virtual ~PdfField() {} + virtual ~PdfField() = default; diff --git a/tests/cuda/CMakeLists.txt b/tests/cuda/CMakeLists.txt index 4c675bc9c..e6af59ed1 100644 --- a/tests/cuda/CMakeLists.txt +++ b/tests/cuda/CMakeLists.txt @@ -27,8 +27,10 @@ if( WALBERLA_BUILD_WITH_CODEGEN ) waLBerla_generate_target_from_python(NAME LbCodeGenerationExampleGeneratedGPU FILE codegen/LbCodeGenerationExampleGPU.py OUT_FILES LbCodeGenerationExample_LatticeModel.cu LbCodeGenerationExample_LatticeModel.h + PDF_Setter.cu PDF_Setter.h LbCodeGenerationExample_NoSlip.cu LbCodeGenerationExample_NoSlip.h - LbCodeGenerationExample_UBB.cu LbCodeGenerationExample_UBB.h ) + LbCodeGenerationExample_UBB.cu LbCodeGenerationExample_UBB.h + PackInfo.cu PackInfo.h) waLBerla_compile_test( FILES codegen/LbCodeGenerationExampleGPU.cpp DEPENDS LbCodeGenerationExampleGeneratedGPU field blockforest geometry timeloop vtk cuda) waLBerla_generate_target_from_python(NAME CodegenJacobiGPUGeneratedCudaJacobiKernel FILE codegen/CudaJacobiKernel.py diff --git a/tests/cuda/codegen/LbCodeGenerationExampleGPU.cpp b/tests/cuda/codegen/LbCodeGenerationExampleGPU.cpp index 6329445a0..2ccc59e9d 100644 --- a/tests/cuda/codegen/LbCodeGenerationExampleGPU.cpp +++ b/tests/cuda/codegen/LbCodeGenerationExampleGPU.cpp @@ -13,8 +13,8 @@ // You should have received a copy of the GNU General Public License along // with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. // -//! \file SrtWithForceField.cpp -//! \author Martin Bauer <martin.bauer@fau.de> +//! \file LBCodeGenerationExampleGPU.cpp +//! \author Markus Holzer <markus.holzer@fau.de> // //====================================================================================================================== @@ -25,6 +25,11 @@ #include "geometry/all.h" #include "timeloop/all.h" +#include "cuda/AddGPUFieldToStorage.h" +#include "cuda/DeviceSelectMPI.h" +#include "cuda/communication/UniformGPUScheme.h" + + #include "lbm/field/PdfField.h" #include "lbm/field/AddToStorage.h" #include "lbm/communication/PdfFieldPackInfo.h" @@ -33,28 +38,31 @@ #include "LbCodeGenerationExample_UBB.h" #include "LbCodeGenerationExample_NoSlip.h" +#include "PackInfo.h" +#include "PDF_Setter.h" #include "LbCodeGenerationExample_LatticeModel.h" - using namespace walberla; typedef lbm::LbCodeGenerationExample_LatticeModel LatticeModel_T; typedef LatticeModel_T::Stencil Stencil_T; typedef LatticeModel_T::CommunicationStencil CommunicationStencil_T; typedef lbm::PdfField< LatticeModel_T > PdfField_T; +typedef pystencils::PackInfo PackInfo_T; typedef GhostLayerField< real_t, LatticeModel_T::Stencil::D > VectorField_T; +typedef GhostLayerField< real_t, 19 > Test_T; typedef GhostLayerField< real_t, 1 > ScalarField_T; typedef walberla::uint8_t flag_t; typedef FlagField< flag_t > FlagField_T; - +typedef cuda::GPUField<double> GPUField; int main( int argc, char ** argv ) { walberla::Environment walberlaEnv( argc, argv ); - + cuda::selectDeviceBasedOnMpiRank(); auto blocks = blockforest::createUniformBlockGridFromConfig( walberlaEnv.config() ); // read parameters @@ -67,12 +75,26 @@ int main( int argc, char ** argv ) const double remainingTimeLoggerFrequency = parameters.getParameter< double >( "remainingTimeLoggerFrequency", 3.0 ); // in seconds // create fields - BlockDataID forceFieldId = field::addToStorage<VectorField_T>( blocks, "Force", real_t( 0.0 )); BlockDataID velFieldId = field::addToStorage<VectorField_T>( blocks, "Velocity", real_t( 0.0 )); - BlockDataID omegaFieldId = field::addToStorage<ScalarField_T>( blocks, "Omega", real_t( 0.0 )); + BlockDataID velFieldIdGPU = cuda::addGPUFieldToStorage<VectorField_T >( blocks, velFieldId, "Velocity field on GPU", true ); + + // the generated latticeModel is for GPU usage only. Thus all input data has to be allocated on GPU + LatticeModel_T latticeModel = LatticeModel_T( velFieldIdGPU, omega ); + + // LatticeModel_T latticeModel = LatticeModel_T( velFieldIdGPU, omega ); + // BlockDataID pdfFieldId = field::addToStorage<Test_T>(blocks, "pdf field", real_t(0), field::fzyx); + BlockDataID pdfFieldId = lbm::addPdfFieldToStorage( blocks, "pdf field", latticeModel, initialVelocity, real_t(1), field::fzyx ); + BlockDataID pdfFieldIdGPU = cuda::addGPUPdfFieldToStorage<PdfField_T, LatticeModel_T>( blocks, pdfFieldId, latticeModel, true, "PDF field on GPU" ); + + + pystencils::PDF_Setter pdf_setter(pdfFieldIdGPU); + + WALBERLA_LOG_INFO_ON_ROOT("initialization of the distributions") + for (auto &block : *blocks) + { + pdf_setter(&block); + } - LatticeModel_T latticeModel = LatticeModel_T( forceFieldId, omegaFieldId, velFieldId, omega ); - BlockDataID pdfFieldId = lbm::addPdfFieldToStorage( blocks, "pdf field", latticeModel, initialVelocity, real_t(1) ); BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >( blocks, "flag field" ); // create and initialize boundary handling @@ -81,8 +103,8 @@ int main( int argc, char ** argv ) auto boundariesConfig = walberlaEnv.config()->getOneBlock( "Boundaries" ); - lbm::LbCodeGenerationExample_UBB ubb(blocks, pdfFieldId); - lbm::LbCodeGenerationExample_NoSlip noSlip(blocks, pdfFieldId); + lbm::LbCodeGenerationExample_UBB ubb(blocks, pdfFieldIdGPU); + lbm::LbCodeGenerationExample_NoSlip noSlip(blocks, pdfFieldIdGPU); geometry::initBoundaryHandling<FlagField_T>(*blocks, flagFieldId, boundariesConfig); geometry::setNonBoundaryCellsToDomain<FlagField_T>(*blocks, flagFieldId, fluidFlagUID); @@ -94,31 +116,34 @@ int main( int argc, char ** argv ) SweepTimeloop timeloop( blocks->getBlockStorage(), timesteps ); // create communication for PdfField - blockforest::communication::UniformBufferedScheme< CommunicationStencil_T > communication( blocks ); - communication.addPackInfo( make_shared< lbm::PdfFieldPackInfo< LatticeModel_T > >( pdfFieldId ) ); + cuda::communication::UniformGPUScheme< Stencil_T > com(blocks, 0); + com.addPackInfo(make_shared< PackInfo_T >(pdfFieldIdGPU)); + auto communication = std::function< void() >([&]() { com.communicate(nullptr); }); // add LBM sweep and communication to time loop timeloop.add() << BeforeFunction( communication, "communication" ) << Sweep( noSlip, "noSlip boundary" ); timeloop.add() << Sweep( ubb, "ubb boundary" ); - timeloop.add() << Sweep( LatticeModel_T::Sweep( pdfFieldId ), "LB stream & collide" ); - - // LBM stability check - timeloop.addFuncAfterTimeStep( makeSharedFunctor( field::makeStabilityChecker< PdfField_T, FlagField_T >( walberlaEnv.config(), blocks, pdfFieldId, - flagFieldId, fluidFlagUID ) ), - "LBM stability check" ); + timeloop.add() << Sweep( LatticeModel_T::Sweep( pdfFieldIdGPU ), "LB stream & collide" ); // log remaining time timeloop.addFuncAfterTimeStep( timing::RemainingTimeLogger( timeloop.getNrOfTimeSteps(), remainingTimeLoggerFrequency ), "remaining time logger" ); // add VTK output to time loop - lbm::VTKOutput< LatticeModel_T, FlagField_T >::addToTimeloop( timeloop, blocks, walberlaEnv.config(), pdfFieldId, flagFieldId, fluidFlagUID ); - - // create adaptors, so that the GUI also displays density and velocity - // adaptors are like fields with the difference that they do not store values - // but calculate the values based on other fields ( here the PdfField ) - field::addFieldAdaptor<lbm::Adaptor<LatticeModel_T>::Density> ( blocks, pdfFieldId, "DensityAdaptor" ); - field::addFieldAdaptor<lbm::Adaptor<LatticeModel_T>::VelocityVector>( blocks, pdfFieldId, "VelocityAdaptor" ); + uint_t vtkWriteFrequency = parameters.getParameter<uint_t>("VTKwriteFrequency", 0); + if (vtkWriteFrequency > 0) + { + auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out", + "simulation_step", false, true, true, false, 0); + vtkOutput->addBeforeFunction( [&]() { + cuda::fieldCpy<VectorField_T, GPUField>( blocks, velFieldId, velFieldIdGPU ); + }); + auto Writer = make_shared<field::VTKWriter<VectorField_T>>(velFieldId, "Velocity"); + vtkOutput->addCellDataWriter(Writer); + + timeloop.addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output"); + } + // lbm::VTKOutput< LatticeModel_T, FlagField_T >::addToTimeloop( timeloop, blocks, walberlaEnv.config(), pdfFieldId, flagFieldId, fluidFlagUID ); timeloop.run(); diff --git a/tests/cuda/codegen/LbCodeGenerationExampleGPU.prm b/tests/cuda/codegen/LbCodeGenerationExampleGPU.prm index f233188ac..e79c716eb 100644 --- a/tests/cuda/codegen/LbCodeGenerationExampleGPU.prm +++ b/tests/cuda/codegen/LbCodeGenerationExampleGPU.prm @@ -4,7 +4,7 @@ Parameters omega 1.8; timesteps 500; - useGui 0; + VTKwriteFrequency 100; remainingTimeLoggerFrequency 3; // in seconds } diff --git a/tests/cuda/codegen/LbCodeGenerationExampleGPU.py b/tests/cuda/codegen/LbCodeGenerationExampleGPU.py index 3c01f0ae0..fb699422a 100644 --- a/tests/cuda/codegen/LbCodeGenerationExampleGPU.py +++ b/tests/cuda/codegen/LbCodeGenerationExampleGPU.py @@ -1,23 +1,42 @@ import sympy as sp import pystencils as ps -from lbmpy.creationfunctions import create_lb_collision_rule +from lbmpy.stencils import get_stencil +from lbmpy.creationfunctions import create_lb_method, create_lb_collision_rule, create_lb_update_rule from lbmpy.boundaries import NoSlip, UBB -from pystencils_walberla import CodeGeneration +from lbmpy.macroscopic_value_kernels import macroscopic_values_setter +from pystencils_walberla import CodeGeneration, generate_sweep, generate_pack_info_from_kernel from lbmpy_walberla import RefinementScaling, generate_boundary, generate_lattice_model with CodeGeneration() as ctx: - omega, omega_free = sp.symbols("omega, omega_free") - force_field, vel_field, omega_out = ps.fields("force(3), velocity(3), omega_out: [3D]", layout='zyxf') + stencil = get_stencil("D3Q19") - # the collision rule of the LB method where the some advanced features + dim = len(stencil[0]) + q = len(stencil) + + omega = sp.symbols("omega") + omega_free = 1 + vel_field = ps.fields(f"velocity({dim}): [3D]", layout='zyxf') + pdfs = ps.fields(f"pdfs({q}): [3D]", layout='zyxf') + + lb_method = create_lb_method(stencil=stencil, compressible=True, method='mrt', + relaxation_rates=[omega, omega, omega_free, omega_free, omega_free, omega_free]) + + # the collision rule of the LB method collision_rule = create_lb_collision_rule( - stencil='D3Q19', compressible=True, - method='mrt', relaxation_rates=[omega, omega, omega_free, omega_free, omega_free, omega_free], + lb_method=lb_method, + output={'velocity': vel_field}, optimization={'cse_global': True} ) + update_rule = create_lb_update_rule(collision_rule=collision_rule) + + setter_assignments = macroscopic_values_setter(lb_method, velocity=[0]*dim, pdfs=pdfs.center_vector, density=1) + # generate lattice model and (optionally) boundary conditions # for CPU simulations waLBerla's internal boundary handling can be used as well generate_lattice_model(ctx, 'LbCodeGenerationExample_LatticeModel', collision_rule, target='gpu') + generate_sweep(ctx, 'PDF_Setter', setter_assignments, target='gpu') generate_boundary(ctx, 'LbCodeGenerationExample_UBB', UBB([0.05, 0, 0]), collision_rule.method, target='gpu') generate_boundary(ctx, 'LbCodeGenerationExample_NoSlip', NoSlip(), collision_rule.method, target='gpu') + + generate_pack_info_from_kernel(ctx, "PackInfo", update_rule, target='gpu') diff --git a/tests/lbm/codegen/LbCodeGenerationExample.cpp b/tests/lbm/codegen/LbCodeGenerationExample.cpp index 665292f1b..8ff9abd38 100644 --- a/tests/lbm/codegen/LbCodeGenerationExample.cpp +++ b/tests/lbm/codegen/LbCodeGenerationExample.cpp @@ -54,27 +54,29 @@ typedef FlagField< flag_t > FlagField_T; int main( int argc, char ** argv ) { - walberla::Environment walberlaEnv( argc, argv ); + walberla::Environment walberlaEnv(argc, argv); - auto blocks = blockforest::createUniformBlockGridFromConfig( walberlaEnv.config() ); + auto blocks = blockforest::createUniformBlockGridFromConfig(walberlaEnv.config()); // read parameters - auto parameters = walberlaEnv.config()->getOneBlock( "Parameters" ); + auto parameters = walberlaEnv.config()->getOneBlock("Parameters"); - const real_t omega = parameters.getParameter< real_t > ( "omega", real_c( 1.4 ) ); - const Vector3<real_t> initialVelocity = parameters.getParameter< Vector3<real_t> >( "initialVelocity", Vector3<real_t>() ); - const uint_t timesteps = parameters.getParameter< uint_t > ( "timesteps", uint_c( 10 ) ); + const real_t omega = parameters.getParameter< real_t >("omega", real_c(1.4)); + const Vector3< real_t > initialVelocity = + parameters.getParameter< Vector3< real_t > >("initialVelocity", Vector3< real_t >()); + const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(10)); - const double remainingTimeLoggerFrequency = parameters.getParameter< double >( "remainingTimeLoggerFrequency", 3.0 ); // in seconds + const double remainingTimeLoggerFrequency = + parameters.getParameter< double >("remainingTimeLoggerFrequency", 3.0); // in seconds // create fields - BlockDataID forceFieldId = field::addToStorage<VectorField_T>( blocks, "Force", real_t( 0.0 )); - BlockDataID velFieldId = field::addToStorage<VectorField_T>( blocks, "Velocity", real_t( 0.0 )); - BlockDataID omegaFieldId = field::addToStorage<ScalarField_T>( blocks, "Omega", real_t( 0.0 )); + BlockDataID forceFieldId = field::addToStorage< VectorField_T >(blocks, "Force", real_t(0.0)); + BlockDataID velFieldId = field::addToStorage< VectorField_T >(blocks, "Velocity", real_t(0.0)); + BlockDataID omegaFieldId = field::addToStorage< ScalarField_T >(blocks, "Omega", real_t(0.0)); - LatticeModel_T latticeModel = LatticeModel_T( forceFieldId, omegaFieldId, velFieldId, omega ); - BlockDataID pdfFieldId = lbm::addPdfFieldToStorage( blocks, "pdf field", latticeModel, initialVelocity, real_t(1) ); - BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >( blocks, "flag field" ); + LatticeModel_T latticeModel = LatticeModel_T(forceFieldId, omegaFieldId, velFieldId, omega); + BlockDataID pdfFieldId = lbm::addPdfFieldToStorage(blocks, "pdf field", latticeModel, initialVelocity, real_t(1)); + BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field"); // create and initialize boundary handling const FlagUID fluidFlagUID( "Fluid" ); -- GitLab