diff --git a/lib/walberla/experimental/communication/UniformGpuFieldPackInfoBase.hpp b/lib/walberla/experimental/communication/UniformGpuFieldPackInfoBase.hpp index bdf4b46e71ac66b15fbe211d1db74d3cc2a42ad5..acff9cb81f59786a699a624a8f2bbe2e56da5eb0 100644 --- a/lib/walberla/experimental/communication/UniformGpuFieldPackInfoBase.hpp +++ b/lib/walberla/experimental/communication/UniformGpuFieldPackInfoBase.hpp @@ -63,7 +63,8 @@ class UniformGpuFieldPackInfoBase : public gpu::GeneratedGPUPackInfo CellInterval ci; field->getGhostRegion(dir, ci, sliceWidth_, false); std::span< value_type > buffer{ (value_type*) rawBuffer, this->size(dir, block) }; - impl().doUnpack(field, buffer, dir, ci, stream); + stencil::Direction commDir{ stencil::inverseDir[ dir ] }; + impl().doUnpack(field, buffer, commDir, ci, stream); } void communicateLocal ( stencil::Direction dir, const IBlock *sender, IBlock *receiver, gpuStream_t stream ) override { @@ -83,13 +84,14 @@ class UniformGpuFieldPackInfoBase : public gpu::GeneratedGPUPackInfo uint_t size(stencil::Direction dir, IBlock* block) override { using Field_T = typename Impl::Field_T; + using value_type = typename Field_T::value_type; const Field_T * field = block->getData< Field_T >(fieldId_); CellInterval ci; - field->getGhostRegion(dir, ci, 1, false); + field->getGhostRegion(dir, ci, sliceWidth_, false); uint_t elementsPerCell{ impl().elementsPerCell(dir) }; - return elementsPerCell * ci.numCells(); + return elementsPerCell * ci.numCells() * sizeof( value_type ); } protected: diff --git a/src/walberla/codegen/api.py b/src/walberla/codegen/api.py index 284b31766546141768791ca7d544ba29151d4238..b5fa9d78009062165fcf7bd48b246be02ec37680 100644 --- a/src/walberla/codegen/api.py +++ b/src/walberla/codegen/api.py @@ -113,6 +113,11 @@ class CellInterval(_PlainCppClass): class Direction(_PlainCppClass): _type = cpptype("walberla::stencil::Direction", "stencil/Directions.h") + @staticmethod + def from_offset(offset: tuple[int, int, int]) -> str: + from pystencils.stencil import offset_to_direction_string + return f"walberla::stencil::Direction::{offset_to_direction_string(offset)}" + class BlockDataID(_PlainCppClass): _type = cpptype("walberla::BlockDataID", "domain_decomposition/BlockDataID.h") diff --git a/src/walberla/codegen/communication/pack_infos.py b/src/walberla/codegen/communication/pack_infos.py index f262526eef3e13296ec948b6614f990ac8bbd2d6..a3284f386328c1950226dba46f5793ac2f87fbe6 100644 --- a/src/walberla/codegen/communication/pack_infos.py +++ b/src/walberla/codegen/communication/pack_infos.py @@ -22,7 +22,7 @@ from pystencilssfg.ir.call_tree import SfgCallTreeNode from pystencilssfg.ir.postprocessing import PostProcessingContext, SfgDeferredNode from pystencilssfg.lang import SfgKernelParamVar, AugExpr, strip_ptr_ref from pystencilssfg.lang.cpp import std -from pystencilssfg.lang.gpu import CudaAPI, HipAPI +from pystencilssfg.lang.gpu import CudaAPI, HipAPI, ProvidesGpuRuntimeAPI from ..api import GpuFieldPtr, Direction, CellInterval, uint_t from ..build_config import get_build_config @@ -123,7 +123,7 @@ class GpuPdfFieldPackInfo(CustomGenerator): cfg=build_config.get_pystencils_config(), ) - # GpuAPI: type[ProvidesGpuRuntimeAPI] + GpuAPI: type[ProvidesGpuRuntimeAPI] match pkc.cfg.get_target(): case Target.CUDA: GpuAPI = CudaAPI @@ -171,7 +171,7 @@ class GpuPdfFieldPackInfo(CustomGenerator): sfg.switch(dir) .cases( { - f"walberla::stencil::Direction::{offset_to_direction_string(comm_dir)}": sfg.gpu_invoke( + Direction.from_offset(comm_dir): sfg.gpu_invoke( pack_kernels[comm_dir], stream=stream ) for comm_dir in comm_dirs @@ -189,7 +189,7 @@ class GpuPdfFieldPackInfo(CustomGenerator): sfg.switch(dir) .cases( { - f"walberla::stencil::Direction::{offset_to_direction_string(comm_dir)}": sfg.gpu_invoke( + Direction.from_offset(comm_dir): sfg.gpu_invoke( unpack_kernels[comm_dir], stream=stream ) for comm_dir in comm_dirs @@ -226,7 +226,12 @@ class GpuPdfFieldPackInfo(CustomGenerator): .inline() .const() .params(dir) - .returns(uint_t)(f"return {self._field.index_shape[0]};"), # FIXME: Wrong! + .returns(uint_t)( + sfg.switch(dir, autobreak=False).cases({ + Direction.from_offset(comm_dir): f"return {len(elems)};" + for comm_dir, elems in self._communication_sets.items() + }).default("return 0;") + ), ) ) diff --git a/tests/BasicLbmScenarios/CMakeLists.txt b/tests/BasicLbmScenarios/CMakeLists.txt index 58dfde5a5c2de2ce2901c0efd94b4eefcf11104e..e2a87497f6cbd3f855f0def1a9a726f791d6cde2 100644 --- a/tests/BasicLbmScenarios/CMakeLists.txt +++ b/tests/BasicLbmScenarios/CMakeLists.txt @@ -11,7 +11,9 @@ target_link_libraries( TestBasicLbmScenariosCPU PRIVATE walberla::core walberla: add_dependencies( SfgTests TestBasicLbmScenariosCPU ) foreach( TestID ${TestIDs} ) - add_test( NAME "BasicLbmScenarios - CPU - ${TestID}" COMMAND TestBasicLbmScenariosCPU ${TestID} ) +foreach( NumProcs 1 2 4 ) + add_test( NAME "BasicLbmScenarios - CPU - ${NumProcs} Processes - ${TestID}" COMMAND mpiexec -c ${NumProcs} TestBasicLbmScenariosCPU ${TestID} ) +endforeach() endforeach() @@ -39,6 +41,8 @@ if( $CACHE{WALBERLA_BUILD_WITH_HIP} ) add_dependencies( SfgTests TestBasicLbmScenariosHIP ) foreach( TestID ${TestIDs} ) - add_test( NAME "BasicLbmScenarios - HIP - ${TestID}" COMMAND TestBasicLbmScenariosHIP ${TestID} ) + foreach( NumProcs 1 2 4 ) + add_test( NAME "BasicLbmScenarios - HIP - ${NumProcs} Processes - ${TestID}" COMMAND mpiexec -c ${NumProcs} TestBasicLbmScenariosHIP ${TestID} ) + endforeach() endforeach() endif() diff --git a/tests/BasicLbmScenarios/TestBasicLbmScenarios.cpp b/tests/BasicLbmScenarios/TestBasicLbmScenarios.cpp index 974e5f28b914698aef91aa6c73604447616be0c9..e25e33a73ad607ca21deaade1b934a7cd4f898e1 100644 --- a/tests/BasicLbmScenarios/TestBasicLbmScenarios.cpp +++ b/tests/BasicLbmScenarios/TestBasicLbmScenarios.cpp @@ -25,8 +25,12 @@ using TestFunction = std::function< void(mpi::Environment&) >; */ void fullyPeriodic(mpi::Environment& env) { - SimDomain dom{ SimDomainBuilder{ - .blocks = { 1, 1, 1 }, .cellsPerBlock = { 32, 32, 32 }, .periodic = { true, true, true } } + uint_t numProcesses = uint_c(MPIManager::instance()->numProcesses()); + Vector3< uint_t > numBlocks{ math::getFactors3D(numProcesses) }; + + SimDomain dom{ SimDomainBuilder{ .blocks = { numBlocks[0], numBlocks[1], numBlocks[2] }, + .cellsPerBlock = { 16, 16, 16 }, + .periodic = { true, true, true } } .build() }; const Vector3< real_t > force{ 0.005, 0., 0. }; @@ -63,9 +67,12 @@ void fullyPeriodic(mpi::Environment& env) void mirroredHalfChannel(mpi::Environment& env) { size_t zCells{ 64 }; + uint_t numProcesses = uint_c(MPIManager::instance()->numProcesses()); + std::vector< uint_t > numBlocksXY{ math::getFactors(numProcesses, 2u) }; - SimDomain dom{ SimDomainBuilder{ - .blocks = { 1, 1, 1 }, .cellsPerBlock = { 4, 4, zCells }, .periodic = { true, true, false } } + SimDomain dom{ SimDomainBuilder{ .blocks = { numBlocksXY[0], numBlocksXY[1], 1 }, + .cellsPerBlock = { 4, 4, zCells }, + .periodic = { true, true, false } } .build() }; /* Hagen-Poiseuille-law in lattice units */ @@ -144,8 +151,10 @@ void mirroredHalfChannel(mpi::Environment& env) */ void freeSlipPipe(mpi::Environment& env) { + uint_t numProcesses = uint_c(MPIManager::instance()->numProcesses()); + SimDomain dom{ SimDomainBuilder{ - .blocks = { 1, 1, 1 }, .cellsPerBlock = { 4, 32, 32 }, .periodic = { true, false, false } } + .blocks = { numProcesses, 1, 1 }, .cellsPerBlock = { 4, 32, 32 }, .periodic = { true, false, false } } .build() }; const FlagUID fluidFlagUid{ "Fluid" }; @@ -265,7 +274,7 @@ int main(int argc, char** argv) if (auto entry = BasicLbmScenarios::TESTS.find(testId); entry != BasicLbmScenarios::TESTS.end()) { - std::get< BasicLbmScenarios::TestFunction >(*entry)(env); + std::get< BasicLbmScenarios::TestFunction > (*entry)(env); return EXIT_SUCCESS; }