diff --git a/runtime/domain/ParticleDataHanding.hpp b/runtime/domain/ParticleDataHanding.hpp new file mode 100644 index 0000000000000000000000000000000000000000..879b9ae83a943c94f7482de0d5c3abb0d89d7907 --- /dev/null +++ b/runtime/domain/ParticleDataHanding.hpp @@ -0,0 +1,123 @@ +#include <blockforest/BlockForest.h> +#include <blockforest/BlockDataHandling.h> + +namespace walberla { + +namespace internal { + +class ParticleDeleter { + friend bool operator==(const ParticleDeleter& lhs, const ParticleDeleter& rhs); + +public: + ParticleDeleter(const math::AABB& aabb) : aabb_(aabb) {} + ~ParticleDeleter() {} + +private: + math::AABB aabb_; +}; + +inline bool operator==(const ParticleDeleter& lhs, const ParticleDeleter& rhs) { + return lhs.aabb_ == rhs.aabb_; +} + +} // namespace internal + +class ParticleDataHandling : public blockforest::BlockDataHandling<internal::ParticleDeleter>{ + +public: + ParticleDataHandling() {} + virtual ~ParticleDataHandling() {} + + virtual internal::ParticleDeleter *initialize(IBlock *const block) WALBERLA_OVERRIDE { + return new internal::ParticleDeleter(block->getAABB()); + } + + virtual void serialize(IBlock *const block, const BlockDataID& id, mpi::SendBuffer& buffer) WALBERLA_OVERRIDE { + serializeImpl(static_cast<Block *const>(block), id, buffer, 0, false); + } + + virtual internal::ParticleDeleter* deserialize(IBlock *const block) WALBERLA_OVERRIDE { + return initialize(block); + } + + virtual void deserialize(IBlock *const block, const BlockDataID& id, mpi::RecvBuffer& buffer) WALBERLA_OVERRIDE { + deserializeImpl(block, id, buffer); + } + + virtual void serializeCoarseToFine(Block *const block, const BlockDataID& id, mpi::SendBuffer& buffer, const uint_t child) + WALBERLA_OVERRIDE { + serializeImpl(block, id, buffer, child, true); + } + + virtual void serializeFineToCoarse(Block *const block, const BlockDataID& id, mpi::SendBuffer& buffer) WALBERLA_OVERRIDE { + serializeImpl(block, id, buffer, 0, false); + } + + virtual internal::ParticleDeleter *deserializeCoarseToFine(Block *const block) WALBERLA_OVERRIDE { + return initialize(block); + } + + virtual internal::ParticleDeleter *deserializeFineToCoarse(Block *const block) WALBERLA_OVERRIDE { + return initialize(block); + } + + virtual void deserializeCoarseToFine(Block *const block, const BlockDataID& id, mpi::RecvBuffer& buffer) WALBERLA_OVERRIDE { + deserializeImpl(block, id, buffer); + } + + virtual void deserializeFineToCoarse(Block *const block, const BlockDataID& id, mpi::RecvBuffer& buffer, const uint_t) + WALBERLA_OVERRIDE { + deserializeImpl(block, id, buffer); + } + +private: + void serializeImpl(Block *const block, const BlockDataID&, mpi::SendBuffer& buffer, const uint_t child, bool check_child) { + auto ptr = buffer.allocate<uint_t>(); + double aabb_check[6]; + int nparticles; + + if(check_child) { + const auto child_id = BlockID(block->getId(), child); + const auto child_aabb = block->getForest().getAABBFromBlockId(child_id); + aabb_check[0] = child_aabb.xMin(); + aabb_check[1] = child_aabb.xMax(); + aabb_check[2] = child_aabb.yMin(); + aabb_check[3] = child_aabb.yMax(); + aabb_check[4] = child_aabb.zMin(); + aabb_check[5] = child_aabb.zMax(); + } else { + const auto aabb = block->getAABB(); + aabb_check[0] = aabb.xMin(); + aabb_check[1] = aabb.xMax(); + aabb_check[2] = aabb.yMin(); + aabb_check[3] = aabb.yMax(); + aabb_check[4] = aabb.zMin(); + aabb_check[5] = aabb.zMax(); + } + + nparticles = md_serialize_particles(aabb_check); + + for(int i = 0; i < nparticles * 7; ++i) { + buffer << md_get_send_buffer_value(i); + } + + *ptr = (uint_t) nparticles; + } + + void deserializeImpl(IBlock *const, const BlockDataID&, mpi::RecvBuffer& buffer) { + uint_t nparticles; + buffer >> nparticles; + + md_resize_recv_buffer_capacity((int) nparticles); + + for(int i = 0; i < (int) nparticles * 7; ++i) { + double v; + buffer >> v; + md_set_recv_buffer_value(i, v); + } + + md_deserialize_particles((int) nparticles); + } +}; + +} // namespace walberla \ No newline at end of file diff --git a/runtime/domain/block_forest.cpp b/runtime/domain/block_forest.cpp index cc6ec7fca84610b3fbec214e1d4de8ebcddad4bd..af30587cedd075b475353630a33060c625fbfdbd 100644 --- a/runtime/domain/block_forest.cpp +++ b/runtime/domain/block_forest.cpp @@ -15,18 +15,15 @@ #include "../pairs_common.hpp" #include "../devices/device.hpp" #include "regular_6d_stencil.hpp" -#include "MDDataHandling.h" +#include "ParticleDataHandling.h" namespace pairs { -void BlockForest::updateNeighborhood( - std::shared_ptr<BlockForest> forest, - blockforest::InfoCollection& info, - real_t *ranks, - real_t *naabbs, - real_t *aabbs) { - +void BlockForest::updateNeighborhood() { auto me = mpi::MPIManager::instance()->rank(); + this->nranks = 0; + this->total_aabbs = 0; + for(auto& iblock: *forest) { auto block = static_cast<blockforest::Block *>(&iblock); auto& block_info = info[block->getId()]; @@ -56,7 +53,6 @@ void BlockForest::updateNeighborhood( vec_ranks.clear(); vec_naabbs.clear(); vec_aabbs.clear(); - total_aabbs = 0; for(auto& nbh: neighborhood) { auto rank = nbh.first; @@ -71,16 +67,14 @@ void BlockForest::updateNeighborhood( vec_aabbs.push_back(aabb.yMax()); vec_aabbs.push_back(aabb.zMin()); vec_aabbs.push_back(aabb.zMax()); - total_aabbs++; + this->total_aabbs++; } + + this->nranks++; } +} - // TODO: delegate these next lines to functions (getRuntimeInt?) - *nranks = nranks; - *total_aabbs = total_aabbs; - ps->copyToDevice(aabbs); -} void BlockForest::copyRuntimeArray(const std::string& name, void *dest, const int size) { void *src = name.compare('ranks') ? vec_ranks.data() : @@ -91,8 +85,8 @@ void BlockForest::copyRuntimeArray(const std::string& name, void *dest, const in name.compare('subdom') ? subdom; bool is_real = name.compare('aabbs') || name.compare('subdom'); - int tsize = (is_real) ? sizeof(real_t) : sizeof(int); - std::memcpy(dest, src, size * tsize) + int tsize = is_real ? sizeof(real_t) : sizeof(int); + std::memcpy(dest, src, size * tsize); } /* @@ -121,12 +115,7 @@ void BlockForest::copyRuntimeArray(const std::string& name, void *dest, const in }) as u32; */ -void BlockForest::updateWeights( - shared_ptr<BlockForest> forest, - blockforest::InfoCollection& info, - real_t *position, - int nparticles) { - +void BlockForest::updateWeights(real_t *position, int nparticles) { mpi::BufferSystem bs(mpi::MPIManager::instance()->comm(), 756); info.clear(); @@ -229,20 +218,30 @@ uint_t BlockForest::getInitialRefinementLevel(uint_t num_processes) { return refinementLevel; } -void BlockForest::getBlockForestAABB(double (&rank_aabb)[6]) { +void BlockForest::setBoundingBox() { auto aabb_union = forest->begin()->getAABB(); - for(auto& iblock: *forest) { auto block = static_cast<blockforest::Block *>(&iblock); aabb_union.merge(block->getAABB()); } - rank_aabb[0] = aabb_union.xMin(); - rank_aabb[1] = aabb_union.xMax(); - rank_aabb[2] = aabb_union.yMin(); - rank_aabb[3] = aabb_union.yMax(); - rank_aabb[4] = aabb_union.zMin(); - rank_aabb[5] = aabb_union.zMax(); + subdom[0] = aabb_union.xMin(); + subdom[1] = aabb_union.xMax(); + subdom[2] = aabb_union.yMin(); + subdom[3] = aabb_union.yMax(); + subdom[4] = aabb_union.zMin(); + subdom[5] = aabb_union.zMax(); +} + +void BlockForest::rebalance() { + if(balance_workload) { + this->updateWeights(); + forest->refresh(); + } + + this->setBoundingBox(); + this->updateWeights(); + this->updateNeighborhood(); } void BlockForest::initialize(int *argc, char ***argv) { @@ -260,15 +259,81 @@ void BlockForest::initialize(int *argc, char ***argv) { auto block_config = use_load_balancing ? Vector3<uint_t>(1, 1, 1) : getBlockConfig(procs, gridsize[0], gridsize[1], gridsize[2]); auto ref_level = use_load_balancing ? getInitialRefinementLevel(procs) : 0; - auto forest = blockforest::createBlockForest( + forest = blockforest::createBlockForest( domain, block_config, Vector3<bool>(true, true, true), procs, ref_level); - auto is_within_domain = bind(isWithinBlockForest, _1, _2, _3, forest); - auto info = make_shared<blockforest::InfoCollection>(); - getBlockForestAABB(forest, rank_aabb); + info = make_shared<blockforest::InfoCollection>(); + this->setBoundingBox(); + + if(balance_workload) { + this->initializeWorkloadBalancer(); + } +} + +void BlockForest::initializeWorkloadBalancer() { + const std::string algorithm = "morton"; + real_t baseWeight = 1.0; + real_t metisipc2redist = 1.0; + size_t regridMin = 10; + size_t regridMax = 100; + int maxBlocksPerProcess = 100; + string metisAlgorithm = "none"; + string metisWeightsToUse = "none"; + string metisEdgeSource = "none"; + + forest->recalculateBlockLevelsInRefresh(true); + forest->alwaysRebalanceInRefresh(true); + forest->reevaluateMinTargetLevelsAfterForcedRefinement(true); + forest->allowRefreshChangingDepth(true); + + forest->allowMultipleRefreshCycles(false); + forest->checkForEarlyOutInRefresh(false); + forest->checkForLateOutInRefresh(false); + forest->setRefreshMinTargetLevelDeterminationFunction(pe::amr::MinMaxLevelDetermination(info, regridMin, regridMax)); + + for_each(algorithm.begin(), algorithm.end(), [](char& c) { c = (char) ::tolower(c); }); + + if(algorithm == "morton") { + forest->setRefreshPhantomBlockDataAssignmentFunction(pe::amr::WeightAssignmentFunctor(info, baseWeight)); + forest->setRefreshPhantomBlockDataPackFunction(pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor()); + forest->setRefreshPhantomBlockDataUnpackFunction(pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor()); + + auto prepFunc = blockforest::DynamicCurveBalance<pe::amr::WeightAssignmentFunctor::PhantomBlockWeight>(false, true, false); + + prepFunc.setMaxBlocksPerProcess(maxBlocksPerProcess); + forest->setRefreshPhantomBlockMigrationPreparationFunction(prepFunc); + } else if(algorithm == "hilbert") { + forest->setRefreshPhantomBlockDataAssignmentFunction(pe::amr::WeightAssignmentFunctor(info, baseWeight)); + forest->setRefreshPhantomBlockDataPackFunction(pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor()); + forest->setRefreshPhantomBlockDataUnpackFunction(pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor()); + + auto prepFunc = blockforest::DynamicCurveBalance<pe::amr::WeightAssignmentFunctor::PhantomBlockWeight>(true, true, false); + + prepFunc.setMaxBlocksPerProcess(maxBlocksPerProcess); + forest->setRefreshPhantomBlockMigrationPreparationFunction(prepFunc); + } else if(algorithm == "metis") { + forest->setRefreshPhantomBlockDataAssignmentFunction(pe::amr::MetisAssignmentFunctor(info, baseWeight)); + forest->setRefreshPhantomBlockDataPackFunction(pe::amr::MetisAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor()); + forest->setRefreshPhantomBlockDataUnpackFunction(pe::amr::MetisAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor()); + + auto alg = blockforest::DynamicParMetis::stringToAlgorithm(metisAlgorithm); + auto vWeight = blockforest::DynamicParMetis::stringToWeightsToUse(metisWeightsToUse); + auto eWeight = blockforest::DynamicParMetis::stringToEdgeSource(metisEdgeSource); + auto prepFunc = blockforest::DynamicParMetis(alg, vWeight, eWeight); + + prepFunc.setipc2redist(metisipc2redist); + forest->setRefreshPhantomBlockMigrationPreparationFunction(prepFunc); + } else if(algorithm == "diffusive") { + forest->setRefreshPhantomBlockDataAssignmentFunction(pe::amr::WeightAssignmentFunctor(info, baseWeight)); + forest->setRefreshPhantomBlockDataPackFunction(pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor()); + forest->setRefreshPhantomBlockDataUnpackFunction(pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor()); + + auto prepFunc = blockforest::DynamicDiffusionBalance<pe::amr::WeightAssignmentFunctor::PhantomBlockWeight>(1, 1, false); + + forest->setRefreshPhantomBlockMigrationPreparationFunction(prepFunc); + } - //subdom_min[d] = this->grid_min[d] + rank_length[d] * (real_t)myloc[d]; - //subdom_max[d] = subdom_min[d] + rank_length[d]; + forest->addBlockData(make_shared<ParticleDataHandling>(), "Interface"); } void BlockForest::finalize() { diff --git a/runtime/domain/block_forest.hpp b/runtime/domain/block_forest.hpp index e5f4a68745798b2d8ec0a55909b53ac47f85401e..1a4034928bd2668ad997243f9c5e1eeca6968cac 100644 --- a/runtime/domain/block_forest.hpp +++ b/runtime/domain/block_forest.hpp @@ -12,6 +12,7 @@ namespace pairs { class BlockForest : public DomainPartitioner { private: std::shared_ptr<BlockForest> forest; + blockforest::InfoCollection info; std::map<uint_t, std::vector<math::AABB>> neighborhood; std::map<uint_t, std::vector<BlockID>> blocks_pushed; std::vector<int> ranks; @@ -20,29 +21,38 @@ private: real_t *subdom_min; real_t *subdom_max; int world_size, rank, total_aabbs; + bool balance_workload; public: BlockForest( real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax) : DomainPartitioner(xmin, xmax, ymin, ymax, zmin, zmax) { - subdom_min = new real_t[ndims]; - subdom_max = new real_t[ndims]; + subdom = new real_t[ndims * 2]; } ~BlockForest() { - delete[] subdom_min; - delete[] subdom_max; + delete[] subdom; } - void setConfig(); void setBoundingBox(); void initialize(int *argc, char ***argv); void finalize(); int getWorldSize() const { return world_size; } int getRank() const { return rank; } + int getNumberOfNeighborRanks() { return this->nranks; } + int getNumberOfNeighborAABBs() { return this->naabbs; } + + void initializeWorkloadBalancer(); + void updateNeighborhood(); + void updateWeights(real_t *position, int nparticles); + Vector3<uint_t> getBlockConfig(uint_t num_processes, uint_t nx, uint_t ny, uint_t nz); + uint_t getInitialRefinementLevel(uint_t num_processes); + void setBoundingBox(); + void rebalance(); + int isWithinSubdomain(real_t x, real_t y, real_t z); - void fillArrays(int *neighbor_ranks, int *pbc, real_t *subdom); + void copyRuntimeArray(const std::string& name, void *dest, const int size); void communicateSizes(int dim, const int *send_sizes, int *recv_sizes); void communicateData( int dim, int elem_size, diff --git a/runtime/domain/regular_6d_stencil.hpp b/runtime/domain/regular_6d_stencil.hpp index 330af65a6ccb140cef8d283eac3ab183d0503c45..4aa95cd9cb20b7027f032442345124f3cd385d05 100644 --- a/runtime/domain/regular_6d_stencil.hpp +++ b/runtime/domain/regular_6d_stencil.hpp @@ -52,8 +52,12 @@ public: void setBoundingBox(); void initialize(int *argc, char ***argv); void finalize(); + int getWorldSize() const { return world_size; } int getRank() const { return rank; } + int getNumberOfNeighborRanks() { return 6; } + int getNumberOfNeighborAABBs() { return 6; } + int isWithinSubdomain(real_t x, real_t y, real_t z); void fillArrays(int *neighbor_ranks, int *pbc, real_t *subdom); void communicateSizes(int dim, const int *send_sizes, int *recv_sizes); diff --git a/runtime/pairs.hpp b/runtime/pairs.hpp index 94a49fa0b954fa28f1251c330ccfae87dab918a8..000e6c47383a2a6996de27d89a15a4bc8702b3b7 100644 --- a/runtime/pairs.hpp +++ b/runtime/pairs.hpp @@ -283,8 +283,9 @@ public: const real_t *send_buf, const int *contact_soffsets, const int *nsend_contact, real_t *recv_buf, int *contact_roffsets, int *nrecv_contact); - void PairsSimulation::copyRuntimeArray( - const std::string& name, const void *dest, const int size); + void copyRuntimeArray(const std::string& name, const void *dest, const int size); + int getNumberOfNeighborRanks() { return this->getDomainPartitioner()->getNumberOfNeighborRanks(); } + int getNumberOfNeighborAABBs() { return this->getDomainPartitioner()->getNumberOfNeighborAABBs(); } // Device functions void sync() { device_synchronize(); } diff --git a/src/pairs/sim/domain_partitioning.py b/src/pairs/sim/domain_partitioning.py index 47e91b360ea2d1ad0401352af4bcf24108d2a8df..aae6dfc060d45fb2bac129ca169fe9296fd64cd5 100644 --- a/src/pairs/sim/domain_partitioning.py +++ b/src/pairs/sim/domain_partitioning.py @@ -100,13 +100,27 @@ class BlockForest: def initialize(self): grid_array = [(self.sim.grid.min(d), self.sim.grid.max(d)) for d in range(self.sim.ndims())] Call_Void(self.sim, "pairs->initDomain", [param for delim in grid_array for param in delim]) - # TODO: Check nranks_capacity and aabb_capacity and resize arrays if needed - Call_Void(self.sim, "pairs->copyRuntimeArray", ['ranks', self.ranks, self.nranks_capacity]) - Call_Void(self.sim, "pairs->copyRuntimeArray", ['naabbs', self.naabbs, self.nranks_capacity]) - Call_Void(self.sim, "pairs->copyRuntimeArray", ['rank_offsets', self.rank_offsets, self.nranks_capacity]) - Call_Void(self.sim, "pairs->copyRuntimeArray", ['pbc', self.pbc, self.aabb_capacity * 3]) - Call_Void(self.sim, "pairs->copyRuntimeArray", ['aabbs', self.aabbs, self.aabb_capacity * 6]) - Call_Void(self.sim, "pairs->copyRuntimeArray", ['subdom', self.subdom, sim.ndims() * 2]) + + Assign(self.sim, self.nranks, Call_Int(self.sim, "pairs->getNumberOfNeighborRanks", [])) + Assign(self.sim, self.naabbs, Call_Int(self.sim, "pairs->getNumberOfNeighborAABBs", [])) + + for _ in Filter(self.sim, self.nranks_capacity < self.nranks): + Assign(self.sim, self.nranks_capacity, self.nranks + 10) + self.ranks.realloc() + self.naabbs.realloc() + self.offsets.realloc() + + for _ in Filter(self.sim, self.aabb_capacity < self.naabbs): + Assign(self.sim, self.aabb_capacity, self.naabbs + 20) + self.pbc.realloc() + self.aabbs.realloc() + + Call_Void(self.sim, "pairs->copyRuntimeArray", ['ranks', self.ranks, self.nranks]) + Call_Void(self.sim, "pairs->copyRuntimeArray", ['naabbs', self.naabbs, self.nranks]) + Call_Void(self.sim, "pairs->copyRuntimeArray", ['rank_offsets', self.rank_offsets, self.nranks]) + Call_Void(self.sim, "pairs->copyRuntimeArray", ['pbc', self.pbc, self.naabbs * 3]) + Call_Void(self.sim, "pairs->copyRuntimeArray", ['aabbs', self.aabbs, self.naabbs * 6]) + Call_Void(self.sim, "pairs->copyRuntimeArray", ['subdom', self.subdom, self.sim.ndims() * 2]) def ghost_particles(self, step, position, offset=0.0): # Particles with one of the following flags are ignored