From c65cee1db07508c27a780ece0558c10684b4acb1 Mon Sep 17 00:00:00 2001 From: Frederik Hennig <frederik.hennig@fau.de> Date: Mon, 17 Mar 2025 14:45:45 +0100 Subject: [PATCH] Introduce MemTag. Use it in SparseIndexList. Adapt codegen to use entities and views. - Introduce the MemTag protocol (from my master's thesis) - Implement CUDA and HIP unified memory allocator - Adapt SparseIndexList to use memtags and select allocators according to them - Adapt codegen to use the entity/view pattern for all fields such that the block data ID of the SparseIndexList is now fully obscured - Make FreeSlipPipe test case run on GPU --- lib/CMakeLists.txt | 3 + lib/walberla/experimental/Memory.hpp | 4 + .../experimental/lbm/IrregularFreeSlip.hpp | 340 +++++++++--------- .../experimental/memory/MemoryTags.hpp | 68 ++++ .../memory/UnifiedMemoryAllocator.hpp | 49 +++ .../experimental/sweep/SparseIndexList.hpp | 149 ++++---- src/walberla/codegen/api.py | 56 ++- src/walberla/codegen/boundaries/hbb.py | 12 +- src/walberla/codegen/boundaries/linkwise.py | 8 +- src/walberla/codegen/sweep.py | 139 ++++--- tests/BasicLbmScenarios/SimDomain.hpp | 9 + .../TestBasicLbmScenarios.cpp | 31 +- 12 files changed, 550 insertions(+), 318 deletions(-) create mode 100644 lib/walberla/experimental/Memory.hpp create mode 100644 lib/walberla/experimental/memory/MemoryTags.hpp create mode 100644 lib/walberla/experimental/memory/UnifiedMemoryAllocator.hpp diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 15a074a..7619333 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -3,9 +3,12 @@ add_library( walberla::experimental ALIAS walberla_experimental ) target_sources( walberla_experimental INTERFACE + walberla/experimental/memory/UnifiedMemoryAllocator.hpp walberla/experimental/sweep/SparseIndexList.hpp walberla/experimental/lbm/GenericHbbBoundary.hpp walberla/experimental/lbm/IrregularFreeSlip.hpp ) +target_link_libraries( walberla_experimental INTERFACE walberla_core walberla_gpu ) + target_include_directories( walberla_experimental INTERFACE ${CMAKE_CURRENT_SOURCE_DIR} ) diff --git a/lib/walberla/experimental/Memory.hpp b/lib/walberla/experimental/Memory.hpp new file mode 100644 index 0000000..877249e --- /dev/null +++ b/lib/walberla/experimental/Memory.hpp @@ -0,0 +1,4 @@ +#pragma once + +#include "./memory/MemoryTags.hpp" +#include "./memory/UnifiedMemoryAllocator.hpp" diff --git a/lib/walberla/experimental/lbm/IrregularFreeSlip.hpp b/lib/walberla/experimental/lbm/IrregularFreeSlip.hpp index 3332d8a..0b9097f 100644 --- a/lib/walberla/experimental/lbm/IrregularFreeSlip.hpp +++ b/lib/walberla/experimental/lbm/IrregularFreeSlip.hpp @@ -2,182 +2,186 @@ #include "core/DataTypes.h" #include "core/cell/Cell.h" + #include "domain_decomposition/BlockDataID.h" #include "domain_decomposition/IBlock.h" -#include "field/GhostLayerField.h" + #include "field/FlagField.h" -#include "walberla/experimental/sweep/SparseIndexList.hpp" +#include "field/GhostLayerField.h" + #include "stencil/Directions.h" + +#include <memory> #include <tuple> +#include <type_traits> + +#include "walberla/experimental/Memory.hpp" +#include "walberla/experimental/sweep/SparseIndexList.hpp" namespace walberla::experimental::lbm { - struct IrregularFreeSlipLinkInfo - { - int32_t x; - int32_t y; - int32_t z; - int32_t dir; - int32_t source_offset_x; - int32_t source_offset_y; - int32_t source_offset_z; - int32_t source_dir; - IrregularFreeSlipLinkInfo( - walberla::Cell fluidCell, walberla::stencil::Direction linkDir, - walberla::Cell sourceCell, walberla::stencil::Direction sourceDir) - : x{fluidCell.x()}, y{fluidCell.y()}, z{fluidCell.z()}, - dir{int32_t(linkDir)}, source_offset_x{sourceCell.x() - fluidCell.x()}, - source_offset_y{sourceCell.y() - fluidCell.y()}, source_offset_z{sourceCell.z() - fluidCell.z()}, - source_dir{int32_t(sourceDir)} {} - - bool operator==(const IrregularFreeSlipLinkInfo &other) const - { - return std::tie(x, y, z, dir, source_offset_x, source_offset_y, source_offset_z, source_dir) == - std::tie(other.x, other.y, other.z, other.dir, other.source_offset_x, - other.source_offset_y, other.source_offset_z, other.source_dir); - } - }; - - namespace detail - { - template <typename Stencil, typename FlagField_T> - class FreeSlipLinksFromFlagField - { - public: - using IndexList = sweep::SparseIndexList<IrregularFreeSlipLinkInfo>; - using flag_t = typename FlagField_T::flag_t; - - FreeSlipLinksFromFlagField( - StructuredBlockForest &sbfs, - BlockDataID flagFieldID, - field::FlagUID boundaryFlagUID, - field::FlagUID fluidFlagUID) : sbfs_{sbfs}, flagFieldID_(flagFieldID), boundaryFlagUID_(boundaryFlagUID), fluidFlagUID_(fluidFlagUID) {} - - IndexList collectLinks() - { - IndexList indexList{sbfs_}; - - for (auto &block : sbfs_) - { - FlagField_T *flagField = block.getData<FlagField_T>(flagFieldID_); - flag_t boundaryFlag{flagField->getFlag(boundaryFlagUID_)}; - flag_t fluidFlag{flagField->getFlag(fluidFlagUID_)}; - - auto &idxVector = indexList.get(block); - - for (auto it = flagField->beginXYZ(); it != flagField->end(); ++it) - { - Cell c{it.cell()}; - if (!isFlagSet(it, fluidFlag)) - { - continue; - } - - for (auto dIt = Stencil::beginNoCenter(); dIt != Stencil::end(); ++dIt) - { - stencil::Direction dir{*dIt}; - Cell neighbor{c + dir}; - if (flagField->isFlagSet(neighbor, boundaryFlag)) - { - idxVector.emplace_back(createLink(flagField, c, dir)); - } - } - } - } - - return indexList; - } +struct IrregularFreeSlipLinkInfo +{ + int32_t x; + int32_t y; + int32_t z; + int32_t dir; + int32_t source_offset_x; + int32_t source_offset_y; + int32_t source_offset_z; + int32_t source_dir; + IrregularFreeSlipLinkInfo(walberla::Cell fluidCell, walberla::stencil::Direction linkDir, walberla::Cell sourceCell, + walberla::stencil::Direction sourceDir) + : x{ fluidCell.x() }, y{ fluidCell.y() }, z{ fluidCell.z() }, dir{ int32_t(linkDir) }, + source_offset_x{ sourceCell.x() - fluidCell.x() }, source_offset_y{ sourceCell.y() - fluidCell.y() }, + source_offset_z{ sourceCell.z() - fluidCell.z() }, source_dir{ int32_t(sourceDir) } + {} + + bool operator==(const IrregularFreeSlipLinkInfo& other) const + { + return std::tie(x, y, z, dir, source_offset_x, source_offset_y, source_offset_z, source_dir) == + std::tie(other.x, other.y, other.z, other.dir, other.source_offset_x, other.source_offset_y, + other.source_offset_z, other.source_dir); + } +}; + +namespace detail +{ +template< typename Stencil, typename FlagField_T, typename IndexList_T > +class FreeSlipLinksFromFlagField +{ + public: + using flag_t = typename FlagField_T::flag_t; + + FreeSlipLinksFromFlagField(StructuredBlockForest& sbfs, BlockDataID flagFieldID, field::FlagUID boundaryFlagUID, + field::FlagUID fluidFlagUID) + : sbfs_{ sbfs }, flagFieldID_(flagFieldID), boundaryFlagUID_(boundaryFlagUID), fluidFlagUID_(fluidFlagUID) + {} + + IndexList_T collectLinks() + { + IndexList_T indexList{ sbfs_ }; + + for (auto& block : sbfs_) + { + FlagField_T* flagField = block.getData< FlagField_T >(flagFieldID_); + flag_t boundaryFlag{ flagField->getFlag(boundaryFlagUID_) }; + flag_t fluidFlag{ flagField->getFlag(fluidFlagUID_) }; - private: - IrregularFreeSlipLinkInfo createLink(FlagField_T *flagField, const Cell &fluidCell, const stencil::Direction dir) + auto& idxVector = indexList.getVector(block); + + for (auto it = flagField->beginXYZ(); it != flagField->end(); ++it) + { + Cell c{ it.cell() }; + if (!isFlagSet(it, fluidFlag)) { continue; } + + for (auto dIt = Stencil::beginNoCenter(); dIt != Stencil::end(); ++dIt) { - const flag_t fluidFlag{flagField->getFlag(fluidFlagUID_)}; - const Cell wallCell{fluidCell + dir}; - - // inverse direction of 'dir' as lattice vector - - const cell_idx_t ix = stencil::cx[stencil::inverseDir[dir]]; - const cell_idx_t iy = stencil::cy[stencil::inverseDir[dir]]; - const cell_idx_t iz = stencil::cz[stencil::inverseDir[dir]]; - - stencil::Direction sourceDir = stencil::inverseDir[dir]; // compute reflected (mirrored) of inverse direction of 'dir' - - cell_idx_t wnx = 0; // compute "normal" vector of free slip wall - cell_idx_t wny = 0; - cell_idx_t wnz = 0; - - if (flagField->isFlagSet(wallCell.x() + ix, wallCell.y(), wallCell.z(), fluidFlag)) - { - wnx = ix; - sourceDir = stencil::mirrorX[sourceDir]; - } - if (flagField->isFlagSet(wallCell.x(), wallCell.y() + iy, wallCell.z(), fluidFlag)) - { - wny = iy; - sourceDir = stencil::mirrorY[sourceDir]; - } - if (flagField->isFlagSet(wallCell.x(), wallCell.y(), wallCell.z() + iz, fluidFlag)) - { - wnz = iz; - sourceDir = stencil::mirrorZ[sourceDir]; - } - - // concave corner (neighbors are non-fluid) - if (wnx == 0 && wny == 0 && wnz == 0) - { - wnx = ix; - wny = iy; - wnz = iz; - sourceDir = dir; - } - - const Cell sourceCell{ - wallCell.x() + wnx, - wallCell.y() + wny, - wallCell.z() + wnz}; - - return { - fluidCell, - dir, - sourceCell, - sourceDir}; + stencil::Direction dir{ *dIt }; + Cell neighbor{ c + dir }; + if (flagField->isFlagSet(neighbor, boundaryFlag)) + { + idxVector.emplace_back(createLink(flagField, c, dir)); + } } - - StructuredBlockForest &sbfs_; - const BlockDataID flagFieldID_; - const field::FlagUID boundaryFlagUID_; - const field::FlagUID fluidFlagUID_; - }; - } - - template <typename Impl> - class IrregularFreeSlipFactory - { - public: - IrregularFreeSlipFactory(const shared_ptr<StructuredBlockForest> blocks, BlockDataID pdfFieldID) - : blocks_{blocks}, pdfFieldID_{pdfFieldID} {} - - template <typename FlagField_T> - auto fromFlagField(BlockDataID flagFieldID, field::FlagUID boundaryFlagUID, field::FlagUID fluidFlagUID) - { - detail::FreeSlipLinksFromFlagField<typename Impl::Stencil, FlagField_T> linksFromFlagField{*blocks_, flagFieldID, boundaryFlagUID, fluidFlagUID}; - auto indexVector = linksFromFlagField.collectLinks(); - return impl().irregularFromIndexVector(indexVector); - } - - protected: - shared_ptr<StructuredBlockForest> blocks_; - BlockDataID pdfFieldID_; - - private: - Impl &impl() - { - return *static_cast<Impl *>(this); - } - - const Impl &impl() const - { - return *static_cast<const Impl *>(this); - } - }; -} + } + } + + return indexList; + } + + private: + IrregularFreeSlipLinkInfo createLink(FlagField_T* flagField, const Cell& fluidCell, const stencil::Direction dir) + { + const flag_t fluidFlag{ flagField->getFlag(fluidFlagUID_) }; + const Cell wallCell{ fluidCell + dir }; + + // inverse direction of 'dir' as lattice vector + + const cell_idx_t ix = stencil::cx[stencil::inverseDir[dir]]; + const cell_idx_t iy = stencil::cy[stencil::inverseDir[dir]]; + const cell_idx_t iz = stencil::cz[stencil::inverseDir[dir]]; + + stencil::Direction sourceDir = + stencil::inverseDir[dir]; // compute reflected (mirrored) of inverse direction of 'dir' + + cell_idx_t wnx = 0; // compute "normal" vector of free slip wall + cell_idx_t wny = 0; + cell_idx_t wnz = 0; + + if (flagField->isFlagSet(wallCell.x() + ix, wallCell.y(), wallCell.z(), fluidFlag)) + { + wnx = ix; + sourceDir = stencil::mirrorX[sourceDir]; + } + if (flagField->isFlagSet(wallCell.x(), wallCell.y() + iy, wallCell.z(), fluidFlag)) + { + wny = iy; + sourceDir = stencil::mirrorY[sourceDir]; + } + if (flagField->isFlagSet(wallCell.x(), wallCell.y(), wallCell.z() + iz, fluidFlag)) + { + wnz = iz; + sourceDir = stencil::mirrorZ[sourceDir]; + } + + // concave corner (neighbors are non-fluid) + if (wnx == 0 && wny == 0 && wnz == 0) + { + wnx = ix; + wny = iy; + wnz = iz; + sourceDir = dir; + } + + const Cell sourceCell{ wallCell.x() + wnx, wallCell.y() + wny, wallCell.z() + wnz }; + + return { fluidCell, dir, sourceCell, sourceDir }; + } + + StructuredBlockForest& sbfs_; + const BlockDataID flagFieldID_; + const field::FlagUID boundaryFlagUID_; + const field::FlagUID fluidFlagUID_; +}; +} // namespace detail + +template< typename Impl > +struct IrregularFreeSlipFactoryImplTraits +{ + using Stencil_T = typename Impl::Stencil; + using IdxStruct_T = IrregularFreeSlipLinkInfo; + using IndexListMemTag_T = typename Impl::memtag_t; + using IndexList_T = sweep::SparseIndexList< IrregularFreeSlipLinkInfo, IndexListMemTag_T >; +}; + +template< typename Impl > +class IrregularFreeSlipFactory +{ + using ImplTraits = IrregularFreeSlipFactoryImplTraits< Impl >; + + public: + IrregularFreeSlipFactory(const shared_ptr< StructuredBlockForest > blocks, BlockDataID pdfFieldID) + : blocks_{ blocks }, pdfFieldID_{ pdfFieldID } + {} + + template< typename FlagField_T > + auto fromFlagField(BlockDataID flagFieldID, field::FlagUID boundaryFlagUID, field::FlagUID fluidFlagUID) + { + detail::FreeSlipLinksFromFlagField< typename ImplTraits::Stencil_T, FlagField_T, typename ImplTraits::IndexList_T > linksFromFlagField{ + *blocks_, flagFieldID, boundaryFlagUID, fluidFlagUID + }; + auto indexVector = linksFromFlagField.collectLinks(); + return impl().irregularFromIndexVector(indexVector); + } + + protected: + shared_ptr< StructuredBlockForest > blocks_; + BlockDataID pdfFieldID_; + + private: + Impl& impl() { return *static_cast< Impl* >(this); } + + const Impl& impl() const { return *static_cast< const Impl* >(this); } +}; +} // namespace walberla::experimental::lbm diff --git a/lib/walberla/experimental/memory/MemoryTags.hpp b/lib/walberla/experimental/memory/MemoryTags.hpp new file mode 100644 index 0000000..01c80e2 --- /dev/null +++ b/lib/walberla/experimental/memory/MemoryTags.hpp @@ -0,0 +1,68 @@ +#pragma once + +#include <concepts> +#include <memory> + +#include "./UnifiedMemoryAllocator.hpp" + +namespace walberla::experimental::memory +{ + +namespace memtag +{ + +/** + * @brief Common base class for all memory tags. + */ +struct _mem_tag +{}; + +/** + * @brief Regular host memory tag. + */ +struct host : public _mem_tag +{}; +inline host host_v; + +/** + * @brief Memory tag indicating managed GPU memory with unified memory semantics. + */ +struct unified : public _mem_tag +{}; +inline unified unified_v; + +} // namespace memtag + +template< typename T > +concept MemTag = std::derived_from< T, memtag::_mem_tag >; + +namespace detail +{ +template< MemTag memtag_t > +struct SelectStandardAllocator; + +template<> +struct SelectStandardAllocator< memtag::host > +{ + template< typename T > + using allocator = std::allocator< T >; +}; + +template<> +struct SelectStandardAllocator< memtag::unified > +{ + template< typename T > + using allocator = UnifiedMemoryAllocator< T >; +}; +} // namespace detail + +/** + * @brief Standard allocator implementation for a given memory tag and value type. + * + * This typedef alias references a type implementing the Allocator named requirement for type T + * which implements the memory semantics required by the given memory tag. + */ +template< MemTag memtag_t, typename T > +using StandardAllocator_T = detail::SelectStandardAllocator< memtag_t >::template allocator< T >; + +} // namespace walberla::experimental::memory diff --git a/lib/walberla/experimental/memory/UnifiedMemoryAllocator.hpp b/lib/walberla/experimental/memory/UnifiedMemoryAllocator.hpp new file mode 100644 index 0000000..e30afec --- /dev/null +++ b/lib/walberla/experimental/memory/UnifiedMemoryAllocator.hpp @@ -0,0 +1,49 @@ +#pragma once + +#include "waLBerlaDefinitions.h" +#include "gpu/GPUWrapper.h" +#include "gpu/ErrorChecking.h" + +namespace walberla::experimental::memory +{ + +#if defined(WALBERLA_BUILD_WITH_CUDA) || defined(WALBERLA_BUILD_WITH_HIP) + +/** + * @brief Allocator for managed GPU memory. + * + * This allocator implementes the Allocator named requirement and + * manages allocation and disposal of memory blocks with unified + * memory semantics, using the CUDA or HIP runtime (depending on which is active). + */ +template< typename T > +class UnifiedMemoryAllocator +{ + public: + using value_type = T; + using pointer_type = T *; + + + pointer_type allocate(size_t n) { + pointer_type ptr; + size_t bytes { n * sizeof(value_type) }; + WALBERLA_GPU_CHECK( + gpuMallocManaged(&ptr, bytes) + ); + return ptr; + } + + void deallocate(pointer_type ptr, size_t n) { + WALBERLA_GPU_CHECK( + gpuFree(ptr) + ); + } + + bool operator== (const UnifiedMemoryAllocator< value_type > &) { + return true; + } +}; + +#endif + +} // namespace walberla::experimental::memory diff --git a/lib/walberla/experimental/sweep/SparseIndexList.hpp b/lib/walberla/experimental/sweep/SparseIndexList.hpp index 6fbc30e..c94436e 100644 --- a/lib/walberla/experimental/sweep/SparseIndexList.hpp +++ b/lib/walberla/experimental/sweep/SparseIndexList.hpp @@ -1,80 +1,89 @@ #pragma once -#include <vector> +#include "blockforest/StructuredBlockForest.h" + #include "core/DataTypes.h" #include "core/cell/Cell.h" #include "domain_decomposition/BlockDataID.h" -#include "blockforest/StructuredBlockForest.h" + +#include <type_traits> +#include <vector> +#include <span> + +#include "../memory/MemoryTags.hpp" namespace walberla::experimental::sweep { - namespace internal - { - /** - * @brief Index vectors for sparse sweeps - */ - template <typename IndexStruct> - class IndexListBuffer - { - public: - using CpuVector = std::vector<IndexStruct>; - - IndexListBuffer() = default; - bool operator==(const IndexListBuffer &other) const { return other.vec_ == vec_; } - - CpuVector &vector() { return vec_; } - - IndexStruct *pointerCpu() { return vec_.data(); } - - private: - CpuVector vec_; - }; - } - - struct CellIdx - { - int64_t x; - int64_t y; - int64_t z; - - CellIdx(const Cell &c) - : x{int64_c(c.x())}, y{int64_c(c.y())}, z{int64_c(c.z())} - { - } - - bool operator==(const CellIdx &other) const - { - return std::tie(x, y, z) == std::tie(other.x, other.y, other.z); - } - }; - - template <typename IndexStruct = CellIdx> - class SparseIndexList - { - public: - using Buffer_T = internal::IndexListBuffer<IndexStruct>; - - explicit SparseIndexList(StructuredBlockForest &sbfs) : bufferId_{createBuffers(sbfs)} {} - - std::vector<IndexStruct> &get(IBlock &block) - { - Buffer_T *buf = block.template getData<Buffer_T>(bufferId_); +namespace internal +{ +/** + * @brief Index vectors for sparse sweeps + */ +template< typename IndexStruct, typename Allocator_T > +class IndexListBuffer +{ + public: + using Vector_T = std::vector< IndexStruct, Allocator_T >; + + IndexListBuffer() = default; + bool operator==(const IndexListBuffer& other) const { return other.vec_ == vec_; } + + Vector_T& vector() { return vec_; } + + IndexStruct* data() { return vec_.data(); } + const IndexStruct* data() const { return vec_.data(); } + size_t size() const { return vec_.size(); } + + private: + Vector_T vec_; +}; +} // namespace internal + +struct CellIdx +{ + int64_t x; + int64_t y; + int64_t z; + + CellIdx(const Cell& c) : x{ int64_c(c.x()) }, y{ int64_c(c.y()) }, z{ int64_c(c.z()) } {} + + bool operator==(const CellIdx& other) const { return std::tie(x, y, z) == std::tie(other.x, other.y, other.z); } +}; + +template< typename IndexStruct = CellIdx, memory::MemTag memtag_t = memory::memtag::host > +class SparseIndexList +{ + public: + using Buffer_T = internal::IndexListBuffer< IndexStruct, memory::StandardAllocator_T< memtag_t, IndexStruct > >; + + explicit SparseIndexList(StructuredBlockForest& sbfs) : bufferId_{ createBuffers(sbfs) } {} + + Buffer_T::Vector_T& getVector(IBlock& block) + { + Buffer_T* buf = block.template getData< Buffer_T >(bufferId_); return buf->vector(); - } - - const BlockDataID & bufferId() const { - return bufferId_; - } - - private: - static BlockDataID createBuffers(StructuredBlockForest &sbfs) - { - auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) - { return new Buffer_T(); }; - return sbfs.addStructuredBlockData<Buffer_T>(createIdxVector); - } - - BlockDataID bufferId_; - }; -} + } + + std::span< IndexStruct > view(IBlock& block){ + Buffer_T& buf = *block.template getData< Buffer_T >(bufferId_); + return { buf.data(), buf.size() }; + } + + std::span< const IndexStruct > view(IBlock& block) const { + const Buffer_T& buf = *block.template getData< Buffer_T >(bufferId_); + return { buf.data(), buf.size() }; + } + + const BlockDataID& bufferId() const { return bufferId_; } + + private: + static BlockDataID createBuffers(StructuredBlockForest& sbfs) + { + auto createIdxVector = [](IBlock* const, StructuredBlockStorage* const) { return new Buffer_T(); }; + return sbfs.addStructuredBlockData< Buffer_T >(createIdxVector); + } + + BlockDataID bufferId_; +}; +} // namespace walberla::experimental::sweep diff --git a/src/walberla/codegen/api.py b/src/walberla/codegen/api.py index 53915c9..46d47c6 100644 --- a/src/walberla/codegen/api.py +++ b/src/walberla/codegen/api.py @@ -2,7 +2,7 @@ from __future__ import annotations from typing import cast -from pystencils import Field, DynamicType +from pystencils import Field, DynamicType, FieldType, Target from pystencils.types import ( UserTypeSpec, create_type, @@ -128,8 +128,8 @@ class IBlockPtr(_PlainCppClass): def getAABB(self) -> AABB: return AABB().bind("{}->getAABB()", self) - def deref(self) -> AugExpr: - return AugExpr.format("*{}", self) + def deref(self) -> IBlock: + return IBlock(ref=True).bind("*{}", self) class SharedPtr(CppClass): @@ -355,25 +355,65 @@ def glfield(field: Field, ci: str | AugExpr | None = None): return GhostLayerFieldExtraction(field_ptr, ci) +class MemTags: + _header = "walberla/experimental/Memory.hpp" + + host = cpptype("walberla::experimental::memory::memtag::host", _header)() + unified = cpptype("walberla::experimental::memory::memtag::unified", _header)() + + class SparseIndexList(AugExpr): _template = cpptype( - "walberla::experimental::sweep::SparseIndexList< {IndexStruct} >", + "walberla::experimental::sweep::SparseIndexList< {IndexStruct}, {memtag_t} >", "walberla/experimental/sweep/SparseIndexList.hpp", ) def __init__( - self, idx_struct: PsStructType, const: bool = False, ref: bool = False + self, + idx_struct: PsStructType, + memtag_t: PsType = MemTags.host, + const: bool = False, + ref: bool = False, ): self._idx_struct = idx_struct - dtype = self._template(IndexStruct=idx_struct, const=const, ref=ref) + dtype = self._template( + IndexStruct=idx_struct, memtag_t=memtag_t, const=const, ref=ref + ) super().__init__(dtype) - def get(self, block: IBlock) -> std.vector: - return std.vector(self._idx_struct, ref=True).bind("{}.get({})", self, block) + def view_type(self) -> std.span: + return std.span(self._idx_struct) + + def view(self, block: IBlock) -> std.span: + return std.span(self._idx_struct).bind("{}.view({})", self, block) def bufferId(self) -> BlockDataID: return BlockDataID().bind("{}.bufferId()", self) + @staticmethod + def from_field( + field: Field, + target: Target | None = None, + const: bool = False, + ref: bool = False, + ): + if ( + not isinstance(field.dtype, PsStructType) + or field.field_type != FieldType.INDEXED + ): + raise ValueError( + "Can only create instances of SparseIndexList from index fields holding structures." + ) + + if target is not None and target.is_gpu(): + memtag = MemTags.unified + else: + memtag = MemTags.host + + return SparseIndexList(field.dtype, memtag_t=memtag, const=const, ref=ref).var( + field.name + ) + class IndexListBufferPtr(AugExpr, SupportsFieldExtraction): _template = cpptype( diff --git a/src/walberla/codegen/boundaries/hbb.py b/src/walberla/codegen/boundaries/hbb.py index 3e8f7a2..c249667 100644 --- a/src/walberla/codegen/boundaries/hbb.py +++ b/src/walberla/codegen/boundaries/hbb.py @@ -12,7 +12,7 @@ from lbmpy.boundaries.boundaryconditions import LbBoundary from lbmpy.advanced_streaming.indexing import Timestep from ..sweep import ( - FieldInfo, + GlFieldInfo, SweepClassProperties, BlockforestParameters, combine_vectors, @@ -55,9 +55,9 @@ class SimpleHbbBoundary(CustomGenerator): khandle = knamespace.add(bc_kernel, self._name) - domain_fields: list[FieldInfo] = sorted( + domain_fields: list[GlFieldInfo] = sorted( ( - FieldInfo( + GlFieldInfo( f, GhostLayerFieldPtr.create(f), BlockDataID().var(f"{f.name}Id") ) for f in khandle.fields @@ -98,13 +98,13 @@ class SimpleHbbBoundary(CustomGenerator): ), # Get fields from block *( - sfg.init(fi.wlb_field_ptr)( - block.getData(fi.wlb_field_ptr.field_type, fi.data_id) + sfg.init(fi.wlb_field)( + block.getData(fi.wlb_field.field_type, fi.data_id) ) for fi in domain_fields ), # Map GhostLayerFields to pystencils fields - *(sfg.map_field(fi.field, fi.wlb_field_ptr) for fi in domain_fields), + *(sfg.map_field(fi.field, fi.wlb_field) for fi in domain_fields), # Get index vector sfg.init(idx_vector)( block.getData(idx_vector.field_type, idx_vector_id) diff --git a/src/walberla/codegen/boundaries/linkwise.py b/src/walberla/codegen/boundaries/linkwise.py index 4459a59..a82e9b4 100644 --- a/src/walberla/codegen/boundaries/linkwise.py +++ b/src/walberla/codegen/boundaries/linkwise.py @@ -16,7 +16,7 @@ from pystencilssfg.composer.custom import CustomGenerator from .hbb import BoundaryIndexType from .boundary_utils import WalberlaLbmBoundary from ..sweep import Sweep -from ..api import SparseIndexList +from ..api import SparseIndexList, MemTags __all__ = ["FreeSlip"] @@ -175,14 +175,15 @@ class FreeSlip(GenericLinkwiseBoundary): factory_crtp_base = ( f"walberla::experimental::lbm::IrregularFreeSlipFactory< {factory_name} >" ) + memtag_t = MemTags.unified if bc_cfg.get_target().is_gpu() else MemTags.host index_vector = SparseIndexList( - WalberlaIrregularFreeSlip.idx_struct_type, ref=True + WalberlaIrregularFreeSlip.idx_struct_type, memtag_t=memtag_t, ref=True ).var("indexVector") sweep_type = bc_sweep.generated_class() sweep_ctor_args = { f"{self._pdf_field.name}Id": "this->pdfFieldID_", - f"{index_field.name}Id": index_vector.bufferId(), + f"{index_field.name}": index_vector, } stencil_name = self._lb_method.stencil.name @@ -194,6 +195,7 @@ class FreeSlip(GenericLinkwiseBoundary): f"friend class {factory_crtp_base};", f"using Stencil = walberla::stencil::{stencil_name};", f"using Sweep = {sweep_type.get_dtype().c_string()};", + f"using memtag_t = {memtag_t.c_string()};" "using Base::Base;", ), sfg.private( diff --git a/src/walberla/codegen/sweep.py b/src/walberla/codegen/sweep.py index 7239ff6..49cbcfe 100644 --- a/src/walberla/codegen/sweep.py +++ b/src/walberla/codegen/sweep.py @@ -4,6 +4,7 @@ from typing import Sequence, Iterable, cast from dataclasses import dataclass from itertools import chain from collections import defaultdict +from abc import ABC, abstractmethod import warnings import sympy as sp @@ -30,8 +31,10 @@ from pystencilssfg.lang import ( AugExpr, Ref, strip_ptr_ref, + SupportsFieldExtraction, SupportsVectorExtraction, ) +from pystencilssfg.lang.cpp import std from pystencilssfg.ir import SfgKernelHandle, SfgCallTreeNode from .build_config import WalberlaBuildConfig from .reflection import GeneratedClassWrapperBase @@ -41,6 +44,7 @@ from .api import ( GhostLayerFieldPtr, GpuFieldPtr, IndexListBufferPtr, + SparseIndexList, IBlockPtr, BlockDataID, Vector2, @@ -258,28 +262,57 @@ class BlockforestParameters: @dataclass -class FieldInfo: +class FieldInfo(ABC): field: Field - wlb_field_ptr: GenericWalberlaField | IndexListBufferPtr + + @property + @abstractmethod + def entity(self) -> AugExpr: ... + + @property + @abstractmethod + def view(self) -> AugExpr: ... + + @abstractmethod + def create_view(self, block: IBlockPtr) -> AugExpr: ... + + def view_extraction(self) -> SupportsFieldExtraction: + assert isinstance(self.view, SupportsFieldExtraction) + return self.view + + +@dataclass +class GlFieldInfo(FieldInfo): + glfield: GenericWalberlaField data_id: BlockDataID - def as_glfield(self) -> GenericWalberlaField: - if not isinstance(self.wlb_field_ptr, GenericWalberlaField): - raise AttributeError( - "This FieldInfo does not encapsulate a ghost layer field" - ) - return self.wlb_field_ptr + @property + def entity(self) -> AugExpr: + return self.data_id - def is_glfield(self) -> bool: - return isinstance(self.wlb_field_ptr, GenericWalberlaField) + @property + def view(self) -> AugExpr: + return self.glfield + + def create_view(self, block: IBlockPtr) -> AugExpr: + return block.getData(self.glfield.field_type, self.data_id) - def as_index_list(self) -> IndexListBufferPtr: - if not isinstance(self.wlb_field_ptr, IndexListBufferPtr): - raise AttributeError("This FieldInfo does not encapsulate an index list") - return self.wlb_field_ptr - def is_index_list(self) -> bool: - return isinstance(self.wlb_field_ptr, IndexListBufferPtr) +@dataclass +class IndexListInfo(FieldInfo): + idx_list: SparseIndexList + idx_vector_view: std.span + + @property + def entity(self) -> AugExpr: + return self.idx_list + + @property + def view(self) -> AugExpr: + return self.idx_vector_view + + def create_view(self, block: IBlockPtr) -> AugExpr: + return self.idx_list.view(block.deref()) class ShadowFieldCache: @@ -290,20 +323,20 @@ class ShadowFieldCache: """ def __init__(self) -> None: - self._originals: dict[str, FieldInfo] = dict() + self._originals: dict[str, GlFieldInfo] = dict() - def add_shadow(self, original: FieldInfo): + def add_shadow(self, original: GlFieldInfo): self._originals[original.field.name] = original def get_shadow(self, original_name: str) -> AugExpr: original = self._originals[original_name] return AugExpr.format( - "this->{}({})", self._getter(original.field.name), original.wlb_field_ptr + "this->{}({})", self._getter(original.field.name), original.glfield ) - def perform_swap(self, original_name: str, shadow_field: FieldInfo) -> AugExpr: + def perform_swap(self, original_name: str, shadow_field: GlFieldInfo) -> AugExpr: original = self._originals[original_name] - return original.as_glfield().swapDataPointers(shadow_field.wlb_field_ptr) + return original.glfield.swapDataPointers(shadow_field.glfield) def render(self, sfg: SfgComposer): if not self._originals: @@ -316,20 +349,20 @@ class ShadowFieldCache: for orig_name, orig in self._originals.items(): unique_ptr_type = PsCustomType( - f"std::unique_ptr< {orig.as_glfield().field_type} >" + f"std::unique_ptr< {orig.glfield.field_type} >" ) - cache_ptr_name = self._cache_ptr(orig.as_glfield()) + cache_ptr_name = self._cache_ptr(orig.glfield) cache_ptrs.append(sfg.var(cache_ptr_name, unique_ptr_type)) getters.append( sfg.method(self._getter(orig_name)) - .returns(orig.wlb_field_ptr.get_dtype()) + .returns(orig.glfield.get_dtype()) .inline()( sfg.branch(f"{cache_ptr_name} == nullptr")( AugExpr.format( "{}.reset({});", cache_ptr_name, - orig.as_glfield().cloneUninitialized(), + orig.glfield.cloneUninitialized(), ) ), f"return {cache_ptr_name}.get();", @@ -521,6 +554,20 @@ class Sweep(CustomGenerator): f"Unable to map field {f} of type {f.field_type} to a waLBerla field." ) + def _make_field_info(self, f: Field, target: Target) -> FieldInfo: + match f.field_type: + case FieldType.GENERIC | FieldType.CUSTOM: + glfield = self._glfield_type.create(f) + data_id = BlockDataID().var(f"{f.name}Id") + return GlFieldInfo(f, glfield, data_id) + case FieldType.INDEXED: + assert isinstance(f.dtype, PsStructType) + idx_list = SparseIndexList.from_field(f, target=target) + view = idx_list.view_type().var(f"{f.name}_view") + return IndexListInfo(f, idx_list, view) + case _: + raise ValueError(f"Unexpected field type: {f.field_type} at field {f}") + def _render_invocation( self, sfg: SfgComposer, target: Target, khandle: SfgKernelHandle ) -> tuple[SfgCallTreeNode, set[SfgVar]]: @@ -560,18 +607,19 @@ class Sweep(CustomGenerator): ) all_fields: dict[str, FieldInfo] = { - f.name: FieldInfo( - f, self._walberla_field(f), BlockDataID().var(f"{f.name}Id") - ) - for f in khandle.fields + f.name: self._make_field_info(f, target) for f in khandle.fields } - swaps: dict[str, FieldInfo] = dict() + swaps: dict[str, GlFieldInfo] = dict() shadows_cache = ShadowFieldCache() for shadow, orig in self._shadow_fields.items(): - swaps[orig.name] = all_fields[shadow.name] - shadows_cache.add_shadow(all_fields[orig.name]) + shadow_fi = all_fields[shadow.name] + assert isinstance(shadow_fi, GlFieldInfo) + original_fi = all_fields[orig.name] + assert isinstance(original_fi, GlFieldInfo) + swaps[orig.name] = shadow_fi + shadows_cache.add_shadow(original_fi) block = IBlockPtr().var("block") block_fields = sorted( @@ -589,7 +637,7 @@ class Sweep(CustomGenerator): vector_groups = combine_vectors(parameters) for fi in block_fields: - props.add_property(fi.data_id, setter=False, getter=True) + props.add_property(fi.entity, setter=False, getter=True) for s in sorted(parameters, key=lambda p: p.name): props.add_property(s, setter=True, getter=True) @@ -597,32 +645,25 @@ class Sweep(CustomGenerator): def render_runmethod(ci: CellInterval | None = None): return [ # Get IDs from class - *(sfg.init(fi.data_id)(props.get(fi.data_id)) for fi in block_fields), - # Extract regular fields from block - *( - sfg.init(fi.wlb_field_ptr)( - block.getData(fi.wlb_field_ptr.field_type, fi.data_id) - ) - for fi in block_fields - ), + *(sfg.init(fi.entity)(props.get(fi.entity)) for fi in block_fields), + # Extract field views + *(sfg.init(fi.view)(fi.create_view(block)) for fi in block_fields), # Get shadow fields from cache *( - sfg.init(shadow_info.wlb_field_ptr)( - shadows_cache.get_shadow(orig_name) - ) + sfg.init(shadow_info.glfield)(shadows_cache.get_shadow(orig_name)) for orig_name, shadow_info in swaps.items() ), # Map GhostLayerFields to pystencils fields *( - sfg.map_field(fi.field, fi.as_glfield().with_cell_interval(ci)) + sfg.map_field(fi.field, fi.glfield.with_cell_interval(ci)) for fi in all_fields.values() - if fi.is_glfield() + if isinstance(fi, GlFieldInfo) ), - # Map index lists to pystencils fields + # Extract indexing information from field view for all remaining fields *( - sfg.map_field(fi.field, fi.as_index_list()) + sfg.map_field(fi.field, fi.view_extraction()) for fi in all_fields.values() - if fi.is_index_list() + if not isinstance(fi, GlFieldInfo) ), # Get parameters from class *(sfg.init(param)(props.get(param)) for param in parameters), diff --git a/tests/BasicLbmScenarios/SimDomain.hpp b/tests/BasicLbmScenarios/SimDomain.hpp index 228a89d..50991de 100644 --- a/tests/BasicLbmScenarios/SimDomain.hpp +++ b/tests/BasicLbmScenarios/SimDomain.hpp @@ -111,6 +111,11 @@ struct SimDomain return sweep::BorderSweep< stencil::Direction::T, gen::bc_grid_aligned::NoSlipTop > { blocks, gen::bc_grid_aligned::NoSlipTop { gpuFields.pdfsId }, -1 }; } + auto + irregularFreeSlipFactory() { + return gen::bc_sparse::FreeSlipIrregularFactory(blocks, gpuFields.pdfsId); + } + void wait() { WALBERLA_GPU_CHECK(gpuDeviceSynchronize()); } void syncGhostLayers() @@ -162,6 +167,10 @@ struct SimDomain return { cpuFields.pdfsId, cpuFields.rhoId, cpuFields.uId, force, omega }; } + auto irregularFreeSlipFactory() { + return gen::bc_sparse::FreeSlipIrregularFactory(blocks, cpuFields.pdfsId); + } + void syncGhostLayers() { commCpu(); } void wait() { /* NOP */ } diff --git a/tests/BasicLbmScenarios/TestBasicLbmScenarios.cpp b/tests/BasicLbmScenarios/TestBasicLbmScenarios.cpp index 656df20..4be1a2d 100644 --- a/tests/BasicLbmScenarios/TestBasicLbmScenarios.cpp +++ b/tests/BasicLbmScenarios/TestBasicLbmScenarios.cpp @@ -50,6 +50,7 @@ void fullyPeriodic(Environment& env) } } +#if defined(LBM_SCENARIOS_GPU_BUILD) /** * Periodic channel flow with a no-slip boundary at the top * and a mirror boundary at the bottom. @@ -134,6 +135,8 @@ void mirroredHalfChannel(Environment& env) } } +#endif + /** * Pipe flow with circular cross-section and free-slip walls. * The pipe flow is initialized with a constant velocity in x-direction. @@ -162,8 +165,7 @@ void freeSlipPipe(Environment& env) const real_t maxVelocity{ 0.02 }; const Vector3< real_t > force{ 0., 0., 0. }; - for (auto& block : *dom.blocks) - { + dom.forAllBlocks([&](IBlock & block) { FlagField_T& flagField = *block.getData< FlagField_T >(dom.cpuFields.flagFieldId); const uint8_t freeSlipFlag{ flagField.getOrRegisterFlag(freeSlipFlagUID) }; @@ -191,17 +193,18 @@ void freeSlipPipe(Environment& env) velField.get(c, 1) = initVelocity[1]; velField.get(c, 2) = initVelocity[2]; } - } + }); geometry::setNonBoundaryCellsToDomain< FlagField_T >(*dom.blocks, dom.cpuFields.flagFieldId, fluidFlagUid); auto flagsOutput = field::createVTKOutput< FlagField_T >(dom.cpuFields.flagFieldId, *dom.blocks, "flags"); flagsOutput(); + dom.fields2device(); dom.initFromFields(force); gen::bc_sparse::FreeSlipIrregular freeSlip{ - gen::bc_sparse::FreeSlipIrregularFactory(dom.blocks, dom.cpuFields.pdfsId) + dom.irregularFreeSlipFactory() .fromFlagField< FlagField_T >(dom.cpuFields.flagFieldId, freeSlipFlagUID, fluidFlagUid) }; @@ -211,27 +214,29 @@ void freeSlipPipe(Environment& env) for (uint_t i = 0; i < 10; ++i) { - for (auto& block : *dom.blocks) - { + dom.forAllBlocks([&](IBlock & block){ streamCollide(&block); + }); + dom.syncGhostLayers(); + dom.fields2host(); + + dom.forAllBlocks([&](IBlock & block){ VectorField_T& velField = *block.getData< VectorField_T >(dom.cpuFields.uId); FlagField_T& flagField = *block.getData< FlagField_T >(dom.cpuFields.flagFieldId); uint8_t fluidFlag = flagField.getFlag(fluidFlagUid); - for (Cell c : allCells) - { + dom.forAllCells([&](Cell c){ if (flagField.isFlagSet(c, fluidFlag)) { WALBERLA_CHECK_FLOAT_EQUAL(velField.get(c, 0), maxVelocity); WALBERLA_CHECK_FLOAT_EQUAL(velField.get(c, 1), 0.); WALBERLA_CHECK_FLOAT_EQUAL(velField.get(c, 2), 0.); } - } - } + }); + }); velOutput(); - dom.syncGhostLayers(); for (auto& block : *dom.blocks) { @@ -247,8 +252,6 @@ int main(int argc, char** argv) { walberla::Environment env{ argc, argv }; BasicLbmScenarios::fullyPeriodic(env); - BasicLbmScenarios::mirroredHalfChannel(env); -#if !defined(LBM_SCENARIOS_GPU_BUILD) + // BasicLbmScenarios::mirroredHalfChannel(env); BasicLbmScenarios::freeSlipPipe(env); -#endif } -- GitLab