From 2aa8cafb19a37f9e8b4b2f7d223eb872969c0042 Mon Sep 17 00:00:00 2001 From: Frederik Hennig <frederik.hennig@fau.de> Date: Tue, 8 Apr 2025 17:19:02 +0200 Subject: [PATCH] Introduce generator for GPU PDF-Field Pack Infos. Squashed commit of the following: commit fd5ccd1442a27fdb209f38dfaa7e2e36e770d8cf Author: Frederik Hennig <frederik.hennig@fau.de> Date: Tue Apr 8 17:14:38 2025 +0200 attempt fix ci commit 2c3b8677008cf964fe2b3fb64b1ca14d84f7070c Author: Frederik Hennig <frederik.hennig@fau.de> Date: Tue Apr 8 17:00:14 2025 +0200 attempt to fix, 2 commit 30e3dbc6cca2b02bb2f87e4ba438ac5e0d8635fa Author: Frederik Hennig <frederik.hennig@fau.de> Date: Tue Apr 8 16:57:31 2025 +0200 attempt to fix mpiexec in CI commit 8daef09dfd1e39b462a7c682ff29f68ac338df62 Author: Frederik Hennig <frederik.hennig@fau.de> Date: Tue Apr 8 16:20:58 2025 +0200 output test logs on failure in CI commit 2eb890ae6d5ca30a4b9374eebe7d005be3890ce1 Author: Frederik Hennig <frederik.hennig@fau.de> Date: Tue Apr 8 15:16:34 2025 +0200 fix and test gpu packinfo packet size and direction sets. commit 4800dcb0f80da8fe315b943c6f447a6e34d9f16a Author: Frederik Hennig <frederik.hennig@fau.de> Date: Tue Apr 8 12:40:18 2025 +0200 fix compiler error commit a171e6f4fcd9b2dd1dc1566565b149d6edbb6f7d Merge: de494f9 7effd7c Author: Frederik Hennig <frederik.hennig@fau.de> Date: Tue Apr 8 12:11:51 2025 +0200 Merge branch 'master' into fhennig/gpu-packinfo commit de494f9be033fff98f582c7c56d213af92f9b9c7 Author: Frederik Hennig <frederik.hennig@fau.de> Date: Mon Apr 7 17:17:05 2025 +0200 add device memtag commit 683531548402af740852cc7e90bd2c20cf1be3b5 Author: Frederik Hennig <frederik.hennig@fau.de> Date: Mon Apr 7 13:19:17 2025 +0200 update gpu pdf field pack info: restrict to streaming PDFs commit 9be1458f80f82c89c6c3e8214713f71e075163bf Author: Frederik Hennig <frederik.hennig@fau.de> Date: Thu Apr 3 09:52:30 2025 +0200 fix GPU comm scheme commit ad769fc122b426dfcfde64f8918617860725bf08 Author: Frederik Hennig <frederik.hennig@fau.de> Date: Wed Apr 2 22:32:16 2025 +0200 finished generated packinfo implementation commit 68b60f662ceea3481760dab5abb767067e5c7662 Author: Frederik Hennig <frederik.hennig@fau.de> Date: Wed Apr 2 16:57:11 2025 +0200 pack info codegen WIP commit 2998cdf7114d1b3c504af1d39bb5652836280db4 Author: Frederik Hennig <frederik.hennig@fau.de> Date: Wed Apr 2 15:25:07 2025 +0200 gpu pack info base class and impl concept --- .gitlab-ci.yml | 4 +- lib/CMakeLists.txt | 1 + .../UniformGpuFieldPackInfoBase.hpp | 107 +++++++ .../experimental/memory/MemoryTags.hpp | 7 + src/walberla/codegen/api.py | 24 +- .../codegen/communication/__init__.py | 3 + .../codegen/communication/pack_infos.py | 289 ++++++++++++++++++ tests/BasicLbmScenarios/CMakeLists.txt | 8 +- tests/BasicLbmScenarios/LbmAlgorithms.py | 14 +- tests/BasicLbmScenarios/PackInfo.py | 12 + tests/BasicLbmScenarios/SimDomain.hpp | 18 +- .../TestBasicLbmScenarios.cpp | 21 +- tests/CMakeLists.txt | 5 + 13 files changed, 480 insertions(+), 33 deletions(-) create mode 100644 lib/walberla/experimental/communication/UniformGpuFieldPackInfoBase.hpp create mode 100644 src/walberla/codegen/communication/__init__.py create mode 100644 src/walberla/codegen/communication/pack_infos.py create mode 100644 tests/BasicLbmScenarios/PackInfo.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index bdaaf9e..37e7a77 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -35,8 +35,10 @@ typecheck: - cd build/${cmakePresetName} - cmake --build . --target SfgTests - cmake --build . --target UserManualExamples + variables: + TESTSUITE_MPIEXEC_FLAGS: --oversubscribe;--allow-run-as-root script: - - ctest + - ctest --output-on-failure .clang-19: image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-19:latest diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 4a50799..71cbd39 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -7,6 +7,7 @@ target_sources( walberla_experimental walberla/experimental/sweep/SparseIndexList.hpp walberla/experimental/lbm/GenericHbbBoundary.hpp walberla/experimental/lbm/IrregularFreeSlip.hpp + walberla/experimental/communication/UniformGpuFieldPackInfoBase.hpp ) target_link_libraries( diff --git a/lib/walberla/experimental/communication/UniformGpuFieldPackInfoBase.hpp b/lib/walberla/experimental/communication/UniformGpuFieldPackInfoBase.hpp new file mode 100644 index 0000000..acff9cb --- /dev/null +++ b/lib/walberla/experimental/communication/UniformGpuFieldPackInfoBase.hpp @@ -0,0 +1,107 @@ +#pragma once + +#if defined(WALBERLA_BUILD_WITH_CUDA) || defined(WALBERLA_BUILD_WITH_HIP) + +#include "core/all.h" + +#include "gpu/communication/GeneratedGPUPackInfo.h" + +#include <concepts> +#include <span> + +namespace walberla::experimental::communication +{ + +namespace detail +{ +template< typename T > +concept UniformGpuFieldPackInfoImpl = requires(T impl, // + typename T::Field_T& field, // + std::span< typename T::Field_T::value_type > buffer, // + stencil::Direction dir, // + CellInterval& ci, // + gpuStream_t stream // +) { + typename T::Field_T; + + { impl.doPack(field, buffer, dir, ci, stream) } -> std::same_as< void >; + + { impl.doUnpack(field, buffer, dir, ci, stream) } -> std::same_as< void >; + + { impl.doLocalCopy(field, ci, field, ci, dir, stream) } -> std::same_as< void >; + + { impl.elementsPerCell(dir) } -> std::same_as< uint_t >; +}; +} // namespace detail + +template< typename Impl > +class UniformGpuFieldPackInfoBase : public gpu::GeneratedGPUPackInfo +{ + public: + // static_assert( detail::UniformGpuFieldPackInfoImpl< Impl >, "Impl does not satisfy contraints."); + + UniformGpuFieldPackInfoBase(BlockDataID fieldId, uint_t sliceWidth = 1) + : fieldId_{ fieldId }, sliceWidth_{ sliceWidth } + {} + + void pack(stencil::Direction dir, unsigned char* rawBuffer, IBlock* block, gpuStream_t stream) override + { + using Field_T = typename Impl::Field_T; + using value_type = typename Field_T::value_type; + Field_T * field = block->getData< Field_T >(fieldId_); + CellInterval ci; + field->getSliceBeforeGhostLayer(dir, ci, sliceWidth_, false); + std::span< value_type > buffer{ ( value_type* ) rawBuffer, this->size(dir, block) }; + impl().doPack(field, buffer, dir, ci, stream); + } + + void unpack(stencil::Direction dir, unsigned char* rawBuffer, IBlock* block, gpuStream_t stream) override + { + using Field_T = typename Impl::Field_T; + using value_type = typename Field_T::value_type; + Field_T * field = block->getData< Field_T >(fieldId_); + CellInterval ci; + field->getGhostRegion(dir, ci, sliceWidth_, false); + std::span< value_type > buffer{ (value_type*) rawBuffer, this->size(dir, block) }; + stencil::Direction commDir{ stencil::inverseDir[ dir ] }; + impl().doUnpack(field, buffer, commDir, ci, stream); + } + + void communicateLocal ( stencil::Direction dir, const IBlock *sender, IBlock *receiver, gpuStream_t stream ) override { + using Field_T = typename Impl::Field_T; + + Field_T * srcField = const_cast< IBlock * >(sender)->getData< Field_T >(fieldId_); + Field_T * dstField = receiver->getData< Field_T >(fieldId_); + + CellInterval srcRegion; + CellInterval dstRegion; + srcField->getSliceBeforeGhostLayer(dir, srcRegion, sliceWidth_, false); + dstField->getGhostRegion(stencil::inverseDir[dir], dstRegion, sliceWidth_, false); + + impl().doLocalCopy(srcField, srcRegion, dstField, dstRegion, dir, stream); + } + + uint_t size(stencil::Direction dir, IBlock* block) override + { + using Field_T = typename Impl::Field_T; + using value_type = typename Field_T::value_type; + + const Field_T * field = block->getData< Field_T >(fieldId_); + CellInterval ci; + field->getGhostRegion(dir, ci, sliceWidth_, false); + + uint_t elementsPerCell{ impl().elementsPerCell(dir) }; + return elementsPerCell * ci.numCells() * sizeof( value_type ); + } + + protected: + BlockDataID fieldId_; + uint_t sliceWidth_; + + private: + Impl& impl() { return static_cast< Impl& >(*this); } +}; + +} // namespace walberla::experimental::communication + +#endif diff --git a/lib/walberla/experimental/memory/MemoryTags.hpp b/lib/walberla/experimental/memory/MemoryTags.hpp index 902d739..0a26a34 100644 --- a/lib/walberla/experimental/memory/MemoryTags.hpp +++ b/lib/walberla/experimental/memory/MemoryTags.hpp @@ -38,6 +38,13 @@ struct unified : public _mem_tag {}; inline unified unified_v; +/** + * @brief Memory tag indicating GPU device memory. + */ +struct device : public _mem_tag +{}; +inline device device_v; + } // namespace memtag template< typename T > diff --git a/src/walberla/codegen/api.py b/src/walberla/codegen/api.py index 766c6fc..b5fa9d7 100644 --- a/src/walberla/codegen/api.py +++ b/src/walberla/codegen/api.py @@ -113,6 +113,11 @@ class CellInterval(_PlainCppClass): class Direction(_PlainCppClass): _type = cpptype("walberla::stencil::Direction", "stencil/Directions.h") + @staticmethod + def from_offset(offset: tuple[int, int, int]) -> str: + from pystencils.stencil import offset_to_direction_string + return f"walberla::stencil::Direction::{offset_to_direction_string(offset)}" + class BlockDataID(_PlainCppClass): _type = cpptype("walberla::BlockDataID", "domain_decomposition/BlockDataID.h") @@ -264,7 +269,7 @@ class GhostLayerFieldPtr(GenericWalberlaField): ) @staticmethod - def create(field: Field): + def create(field: Field, const: bool = False): if field.index_dimensions > 1: raise ValueError( "Cannot map fields with more than one index dimension to field::GhostLayerField." @@ -279,15 +284,11 @@ class GhostLayerFieldPtr(GenericWalberlaField): fsize = field.index_shape[0] if field.index_shape else 1 - return GhostLayerFieldPtr(element_type, fsize).var(field.name) + return GhostLayerFieldPtr(element_type, fsize, const=const).var(field.name) - def __init__( - self, - element_type: UserTypeSpec, - fsize: int, - ): + def __init__(self, element_type: UserTypeSpec, fsize: int, const: bool = False): element_type = create_type(element_type) - field_type = self._template(element_type=element_type, fsize=fsize) + field_type = self._template(element_type=element_type, fsize=fsize, const=const) super().__init__(element_type, field_type, ptr=True) @@ -299,7 +300,7 @@ class GpuFieldPtr(GenericWalberlaField): ) @staticmethod - def create(field: Field): + def create(field: Field, const: bool = False): if field.index_dimensions > 1: raise ValueError( "Cannot map fields with more than one index dimension to gpu::GpuField." @@ -314,15 +315,16 @@ class GpuFieldPtr(GenericWalberlaField): fsize = field.index_shape[0] if field.index_shape else 1 - return GpuFieldPtr(element_type, fsize).var(field.name) + return GpuFieldPtr(element_type, fsize, const=const).var(field.name) def __init__( self, element_type: UserTypeSpec, fsize: int, + const: bool = False, ): element_type = create_type(element_type) - field_type = self._template(element_type=element_type) + field_type = self._template(element_type=element_type, const=const) super().__init__(element_type, field_type, ptr=True) diff --git a/src/walberla/codegen/communication/__init__.py b/src/walberla/codegen/communication/__init__.py new file mode 100644 index 0000000..9a51492 --- /dev/null +++ b/src/walberla/codegen/communication/__init__.py @@ -0,0 +1,3 @@ +from .pack_infos import GpuPdfFieldPackInfo + +__all__ = ["GpuPdfFieldPackInfo"] diff --git a/src/walberla/codegen/communication/pack_infos.py b/src/walberla/codegen/communication/pack_infos.py new file mode 100644 index 0000000..a3284f3 --- /dev/null +++ b/src/walberla/codegen/communication/pack_infos.py @@ -0,0 +1,289 @@ +from __future__ import annotations + +from dataclasses import dataclass + +from pystencils import ( + Field, + FieldType, + Assignment, + CreateKernelConfig, + DynamicType, + Target, +) +from pystencils.stencil import offset_to_direction_string +from pystencils.codegen.properties import FieldBasePtr +from lbmpy import Stencil, LBStencil + +from pystencilssfg import SfgComposer +from pystencilssfg.composer.basic_composer import KernelsAdder +from pystencilssfg.composer.custom import CustomGenerator +from pystencilssfg.ir import SfgKernelHandle, SfgEmptyNode +from pystencilssfg.ir.call_tree import SfgCallTreeNode +from pystencilssfg.ir.postprocessing import PostProcessingContext, SfgDeferredNode +from pystencilssfg.lang import SfgKernelParamVar, AugExpr, strip_ptr_ref +from pystencilssfg.lang.cpp import std +from pystencilssfg.lang.gpu import CudaAPI, HipAPI, ProvidesGpuRuntimeAPI + +from ..api import GpuFieldPtr, Direction, CellInterval, uint_t +from ..build_config import get_build_config + + +@dataclass +class PackingKernelsContext: + sfg: SfgComposer + kns: KernelsAdder + cfg: CreateKernelConfig + + +@dataclass +class CaptureBufferPointer(SfgDeferredNode): + sfg: SfgComposer + buffer_name: str + buffer_span: std.span + + def expand(self, ppc: PostProcessingContext) -> SfgCallTreeNode: + for param in ppc.live_variables: + if ( + isinstance(param, SfgKernelParamVar) + and param.wrapped.fields[0].name == self.buffer_name + and param.wrapped.get_properties(FieldBasePtr) + ): + return self.sfg.init(param)( + AugExpr.format("{}.data()", self.buffer_span) + ) + + return SfgEmptyNode() + + +class GpuPdfFieldPackInfo(CustomGenerator): + """Pack Info for lattice Boltzmann Gpu PDF fields. + + Generate a ghost layer exchange pack info for communicating lattice Boltzmann populations + streaming across a block boundary, + for use with `gpu::GpuField` and `gpu::UniformGpuScheme`. + + For a given velocity set, this pack info will only communicate those populations f_i + from block A to a neighbor block B which are being advected, by the streaming step, + from a cell in A to an adjacent cell in B. + + .. note:: + For the time being, this pack info is restricted to the *pull* streaming pattern. + + Args: + name: Name of the generated pack info class + stencil: Velocity set of the lattice Boltzmann method + field: Symbolic representation of the PDF field + """ + + def __init__(self, name: str, stencil: LBStencil, field: Field): + if field.index_dimensions > 1: + raise ValueError( + "GpuFieldPackInfo currently does not support higher-order tensor fields" + ) + + if isinstance(field.dtype, DynamicType): + raise ValueError( + "Cannot generate GpuFieldPackInfo for a dynamically-typed field" + ) + + self._name = name + self._stencil = stencil + self._full_stencil = ( + LBStencil(Stencil.D3Q27) + if self._stencil.D == 3 + else LBStencil(Stencil.D2Q9) + ) + + # Map storing the set of communicated populations for each communication direction + self._communication_sets: dict[tuple[int, int, int], list[int]] = dict() + for comm_dir in self._full_stencil: + if indices := self._get_streaming_indices(comm_dir): + self._communication_sets[comm_dir] = indices + + self._field = field + self._dtype = field.dtype + self._src_field = Field.new_field_with_different_name( + self._field, f"{self._field.name}_src" + ) + self._dst_field = Field.new_field_with_different_name( + self._field, f"{self._field.name}_dst" + ) + + def generate(self, sfg: SfgComposer) -> None: + base_class = f"walberla::experimental::communication::UniformGpuFieldPackInfoBase< {self._name } >" + sfg.include( + "walberla/experimental/communication/UniformGpuFieldPackInfoBase.hpp" + ) + + build_config = get_build_config(sfg) + + pkc = PackingKernelsContext( + sfg, + kns=sfg.kernel_namespace(f"{self._name}_kernels"), + cfg=build_config.get_pystencils_config(), + ) + + GpuAPI: type[ProvidesGpuRuntimeAPI] + match pkc.cfg.get_target(): + case Target.CUDA: + GpuAPI = CudaAPI + case Target.HIP: + GpuAPI = HipAPI + case other: + raise ValueError( + f"Invalid target for generating GpuFieldPackInfo: {other}" + ) + + pack_kernels: dict[tuple[int, int, int], SfgKernelHandle] = dict() + unpack_kernels: dict[tuple[int, int, int], SfgKernelHandle] = dict() + local_copy_kernels: dict[tuple[int, int, int], SfgKernelHandle] = dict() + + comm_dirs = self._communication_sets.keys() + + for comm_dir in comm_dirs: + if not all(c == 0 for c in comm_dir): + pack_kernels[comm_dir] = self._do_pack(pkc, comm_dir) + unpack_kernels[comm_dir] = self._do_unpack(pkc, comm_dir) + local_copy_kernels[comm_dir] = self._do_local_copy(pkc, comm_dir) + + src_gpu_field = GpuFieldPtr.create(self._src_field) + gpu_field_type = strip_ptr_ref(src_gpu_field.get_dtype()) + dst_gpu_field = GpuFieldPtr.create(self._dst_field) + buffer_span = std.span(self._dtype).var("buffer") + dir = Direction().var("dir") + src_interval = CellInterval(const=True, ref=True).var("srcInterval") + dst_interval = CellInterval(const=True, ref=True).var("dstInterval") + + stream = GpuAPI.stream_t().var("stream") + + sfg.klass(self._name, bases=[f"public {base_class}"])( + sfg.public( + f"using Base = {base_class};", + "using Base::Base;", + f"using Field_T = {gpu_field_type.c_string()};", + sfg.method("doPack").params( + src_gpu_field, buffer_span, dir, src_interval, stream + )( + sfg.map_field( + self._src_field, src_gpu_field.with_cell_interval(src_interval) + ), + CaptureBufferPointer(sfg, "buffer", buffer_span), + sfg.switch(dir) + .cases( + { + Direction.from_offset(comm_dir): sfg.gpu_invoke( + pack_kernels[comm_dir], stream=stream + ) + for comm_dir in comm_dirs + } + ) + .default("/* unreachable */"), + ), + sfg.method("doUnpack").params( + src_gpu_field, buffer_span, dir, dst_interval, stream + )( + sfg.map_field( + self._dst_field, src_gpu_field.with_cell_interval(dst_interval) + ), + CaptureBufferPointer(sfg, "buffer", buffer_span), + sfg.switch(dir) + .cases( + { + Direction.from_offset(comm_dir): sfg.gpu_invoke( + unpack_kernels[comm_dir], stream=stream + ) + for comm_dir in comm_dirs + } + ) + .default("/* unreachable */"), + ), + sfg.method("doLocalCopy").params( + src_gpu_field, + src_interval, + dst_gpu_field, + dst_interval, + dir, + stream, + )( + sfg.map_field( + self._src_field, src_gpu_field.with_cell_interval(src_interval) + ), + sfg.map_field( + self._dst_field, dst_gpu_field.with_cell_interval(dst_interval) + ), + sfg.switch(dir) + .cases( + { + f"walberla::stencil::Direction::{offset_to_direction_string(comm_dir)}": sfg.gpu_invoke( + local_copy_kernels[comm_dir], stream=stream + ) + for comm_dir in comm_dirs + } + ) + .default("/* unreachable */"), + ), + sfg.method("elementsPerCell") + .inline() + .const() + .params(dir) + .returns(uint_t)( + sfg.switch(dir, autobreak=False).cases({ + Direction.from_offset(comm_dir): f"return {len(elems)};" + for comm_dir, elems in self._communication_sets.items() + }).default("return 0;") + ), + ) + ) + + def _pack_accesses(self, comm_dir: tuple[int, int, int]) -> list[Field.Access]: + return [self._src_field.center(i) for i in self._communication_sets[comm_dir]] + + def _unpack_accesses(self, comm_dir: tuple[int, int, int]) -> list[Field.Access]: + return [self._dst_field.center(i) for i in self._communication_sets[comm_dir]] + + def _get_streaming_indices(self, comm_dir) -> list[int]: + if all(d == 0 for d in comm_dir): + return [] + else: + from lbmpy.advanced_streaming.communication import _extend_dir + + directions = set(_extend_dir(comm_dir)) & set(self._stencil) + indices = sorted(self._stencil.index(d) for d in directions) + return indices + + def _do_pack( + self, pkc: PackingKernelsContext, comm_dir: tuple[int, int, int] + ) -> SfgKernelHandle: + pack_accs = self._pack_accesses(comm_dir) + buffer = self._buffer(len(pack_accs)) + asms = [Assignment(buffer(i), acc) for i, acc in enumerate(pack_accs)] + dir_str = offset_to_direction_string(comm_dir) + return pkc.kns.create(asms, f"pack{dir_str}", pkc.cfg) + + def _do_unpack( + self, pkc: PackingKernelsContext, comm_dir: tuple[int, int, int] + ) -> SfgKernelHandle: + unpack_accs = self._unpack_accesses(comm_dir) + buffer = self._buffer(len(unpack_accs)) + asms = [Assignment(acc, buffer(i)) for i, acc in enumerate(unpack_accs)] + dir_str = offset_to_direction_string(comm_dir) + return pkc.kns.create(asms, f"unpack{dir_str}", pkc.cfg) + + def _do_local_copy( + self, pkc: PackingKernelsContext, comm_dir: tuple[int, int, int] + ) -> SfgKernelHandle: + pack_accs = self._pack_accesses(comm_dir) + unpack_accs = self._unpack_accesses(comm_dir) + + asms = [Assignment(dst, src) for dst, src in zip(unpack_accs, pack_accs)] + dir_str = offset_to_direction_string(comm_dir) + return pkc.kns.create(asms, f"localCopy{dir_str}", pkc.cfg) + + def _buffer(self, num_elems: int): + return Field.create_generic( + "buffer", + 1, + field_type=FieldType.BUFFER, + dtype=self._field.dtype, + index_shape=(num_elems,), + ) diff --git a/tests/BasicLbmScenarios/CMakeLists.txt b/tests/BasicLbmScenarios/CMakeLists.txt index 58dfde5..c128ff8 100644 --- a/tests/BasicLbmScenarios/CMakeLists.txt +++ b/tests/BasicLbmScenarios/CMakeLists.txt @@ -11,7 +11,9 @@ target_link_libraries( TestBasicLbmScenariosCPU PRIVATE walberla::core walberla: add_dependencies( SfgTests TestBasicLbmScenariosCPU ) foreach( TestID ${TestIDs} ) - add_test( NAME "BasicLbmScenarios - CPU - ${TestID}" COMMAND TestBasicLbmScenariosCPU ${TestID} ) +foreach( NumProcs 1 2 4 ) + add_test( NAME "BasicLbmScenarios - CPU - ${NumProcs} Processes - ${TestID}" COMMAND mpiexec -c ${NumProcs} ${_SFG_TESTSUITE_MPIEXEC_FLAGS} TestBasicLbmScenariosCPU ${TestID} ) +endforeach() endforeach() @@ -39,6 +41,8 @@ if( $CACHE{WALBERLA_BUILD_WITH_HIP} ) add_dependencies( SfgTests TestBasicLbmScenariosHIP ) foreach( TestID ${TestIDs} ) - add_test( NAME "BasicLbmScenarios - HIP - ${TestID}" COMMAND TestBasicLbmScenariosHIP ${TestID} ) + foreach( NumProcs 1 2 4 ) + add_test( NAME "BasicLbmScenarios - HIP - ${NumProcs} Processes - ${TestID}" COMMAND mpiexec -c ${NumProcs} ${_SFG_TESTSUITE_MPIEXEC_FLAGS} TestBasicLbmScenariosHIP ${TestID} ) + endforeach() endforeach() endif() diff --git a/tests/BasicLbmScenarios/LbmAlgorithms.py b/tests/BasicLbmScenarios/LbmAlgorithms.py index eefe063..c3faa28 100644 --- a/tests/BasicLbmScenarios/LbmAlgorithms.py +++ b/tests/BasicLbmScenarios/LbmAlgorithms.py @@ -16,6 +16,7 @@ from lbmpy.macroscopic_value_kernels import macroscopic_values_setter from walberla.codegen import Sweep, get_build_config from walberla.codegen.boundaries import NoSlip, FreeSlip +from walberla.codegen.communication import GpuPdfFieldPackInfo from walberla.codegen.build_config import DEBUG_MOCK_CMAKE @@ -31,17 +32,19 @@ with SourceFileGenerator(keep_unknown_argv=True) as sfg: match args.target: case "cpu": - build_config.override.target = Target.CurrentCPU + target = Target.CurrentCPU sfg.code("#define LBM_SCENARIOS_CPU_BUILD true") case "hip": - build_config.override.target = Target.HIP + target = Target.HIP sfg.code("#define LBM_SCENARIOS_GPU_BUILD true") case "cuda": - build_config.override.target = Target.CUDA + target = Target.CUDA sfg.code("#define LBM_SCENARIOS_GPU_BUILD true") case _: raise ValueError(f"Unexpected target id: {args.target}") + build_config.override.target = target + sfg.namespace("BasicLbmScenarios::gen") stencil = LBStencil(Stencil.D3Q19) @@ -117,3 +120,8 @@ with SourceFileGenerator(keep_unknown_argv=True) as sfg: wall_orientation=FreeSlip.IRREGULAR, ) sfg.generate(irreg_freeslip) + + if build_config.override.target.is_gpu(): + with sfg.namespace("comm"): + pack_info = GpuPdfFieldPackInfo("GpuPdfsPackInfo", stencil, f) + sfg.generate(pack_info) diff --git a/tests/BasicLbmScenarios/PackInfo.py b/tests/BasicLbmScenarios/PackInfo.py new file mode 100644 index 0000000..df3ccdd --- /dev/null +++ b/tests/BasicLbmScenarios/PackInfo.py @@ -0,0 +1,12 @@ +import pystencils as ps +from lbmpy import Stencil, LBStencil +from pystencilssfg import SourceFileGenerator +from walberla.codegen.communication import GpuPdfFieldPackInfo +from walberla.codegen.build_config import DEBUG_MOCK_CMAKE + +DEBUG_MOCK_CMAKE.use_hip_default() + +with SourceFileGenerator() as sfg: + stencil = LBStencil(Stencil.D3Q19) + field = ps.fields(f"f({stencil.Q}): double[{stencil.D}D]") + sfg.generate(GpuPdfFieldPackInfo("PackInfo", stencil, field)) diff --git a/tests/BasicLbmScenarios/SimDomain.hpp b/tests/BasicLbmScenarios/SimDomain.hpp index 4417699..105e5d7 100644 --- a/tests/BasicLbmScenarios/SimDomain.hpp +++ b/tests/BasicLbmScenarios/SimDomain.hpp @@ -45,7 +45,7 @@ using CommonGpuField = gpu::GPUField< PdfField_T::value_type >; using GpuCommScheme = gpu::communication::UniformGPUScheme< gen::LbStencil >; // using GpuPdfsPackInfo = gpu::communication::MemcpyPackInfo< CommonGpuField >; -using GpuPdfsPackInfo = gpu::communication::GPUPackInfo< CommonGpuField >; +using GpuPdfsPackInfo = gen::comm::GpuPdfsPackInfo; #endif struct SimDomain @@ -70,7 +70,7 @@ struct SimDomain const BlockDataID uId; } gpuFields; - // GpuCommScheme commGpu; + std::unique_ptr< GpuCommScheme > commGpu; void initFromFields(const Vector3< real_t > force) { @@ -121,7 +121,7 @@ struct SimDomain void syncGhostLayers() { // WALBERLA_GPU_CHECK(gpuPeekAtLastError()); - commCpu(); + (*commGpu)(); } void fields2host() @@ -242,15 +242,13 @@ struct SimDomainBuilder const BlockDataID rhoIdGpu = gpu::addGPUFieldToStorage< ScalarField_T >(sbfs, rhoId, "rho_gpu"); const BlockDataID uIdGpu = gpu::addGPUFieldToStorage< VectorField_T >(sbfs, uId, "u_gpu"); - // GpuCommScheme commGpu{ sbfs }; - // auto gpuPdfsPackInfo = std::make_shared< GpuPdfsPackInfo >(pdfsIdGpu); - // commGpu.addPackInfo(gpuPdfsPackInfo); + auto commGpu = std::make_unique< GpuCommScheme >( sbfs ); auto gpuPdfsPackInfo = std::make_shared< GpuPdfsPackInfo >(pdfsIdGpu); - commCpu.addPackInfo(gpuPdfsPackInfo); + commGpu->addPackInfo(gpuPdfsPackInfo); + // commCpu.addPackInfo(gpuPdfsPackInfo); #endif - return - { + return { .blocks = sbfs, // .cpuFields = { // .pdfsId = pdfsId, @@ -265,7 +263,7 @@ struct SimDomainBuilder .rhoId = rhoIdGpu, .uId = uIdGpu }, - // .commGpu = commGpu + .commGpu = std::move(commGpu) #endif }; } diff --git a/tests/BasicLbmScenarios/TestBasicLbmScenarios.cpp b/tests/BasicLbmScenarios/TestBasicLbmScenarios.cpp index 974e5f2..e25e33a 100644 --- a/tests/BasicLbmScenarios/TestBasicLbmScenarios.cpp +++ b/tests/BasicLbmScenarios/TestBasicLbmScenarios.cpp @@ -25,8 +25,12 @@ using TestFunction = std::function< void(mpi::Environment&) >; */ void fullyPeriodic(mpi::Environment& env) { - SimDomain dom{ SimDomainBuilder{ - .blocks = { 1, 1, 1 }, .cellsPerBlock = { 32, 32, 32 }, .periodic = { true, true, true } } + uint_t numProcesses = uint_c(MPIManager::instance()->numProcesses()); + Vector3< uint_t > numBlocks{ math::getFactors3D(numProcesses) }; + + SimDomain dom{ SimDomainBuilder{ .blocks = { numBlocks[0], numBlocks[1], numBlocks[2] }, + .cellsPerBlock = { 16, 16, 16 }, + .periodic = { true, true, true } } .build() }; const Vector3< real_t > force{ 0.005, 0., 0. }; @@ -63,9 +67,12 @@ void fullyPeriodic(mpi::Environment& env) void mirroredHalfChannel(mpi::Environment& env) { size_t zCells{ 64 }; + uint_t numProcesses = uint_c(MPIManager::instance()->numProcesses()); + std::vector< uint_t > numBlocksXY{ math::getFactors(numProcesses, 2u) }; - SimDomain dom{ SimDomainBuilder{ - .blocks = { 1, 1, 1 }, .cellsPerBlock = { 4, 4, zCells }, .periodic = { true, true, false } } + SimDomain dom{ SimDomainBuilder{ .blocks = { numBlocksXY[0], numBlocksXY[1], 1 }, + .cellsPerBlock = { 4, 4, zCells }, + .periodic = { true, true, false } } .build() }; /* Hagen-Poiseuille-law in lattice units */ @@ -144,8 +151,10 @@ void mirroredHalfChannel(mpi::Environment& env) */ void freeSlipPipe(mpi::Environment& env) { + uint_t numProcesses = uint_c(MPIManager::instance()->numProcesses()); + SimDomain dom{ SimDomainBuilder{ - .blocks = { 1, 1, 1 }, .cellsPerBlock = { 4, 32, 32 }, .periodic = { true, false, false } } + .blocks = { numProcesses, 1, 1 }, .cellsPerBlock = { 4, 32, 32 }, .periodic = { true, false, false } } .build() }; const FlagUID fluidFlagUid{ "Fluid" }; @@ -265,7 +274,7 @@ int main(int argc, char** argv) if (auto entry = BasicLbmScenarios::TESTS.find(testId); entry != BasicLbmScenarios::TESTS.end()) { - std::get< BasicLbmScenarios::TestFunction >(*entry)(env); + std::get< BasicLbmScenarios::TestFunction > (*entry)(env); return EXIT_SUCCESS; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e27ee17..2738c29 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -17,6 +17,11 @@ set(WALBERLA_BUILD_TUTORIALS OFF CACHE BOOL "") set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED) +set( + _SFG_TESTSUITE_MPIEXEC_FLAGS + $ENV{TESTSUITE_MPIEXEC_FLAGS} +) + include(FetchContent) FetchContent_Declare( -- GitLab