diff --git a/FindwaLBerla.cmake b/FindwaLBerla.cmake new file mode 100644 index 0000000000000000000000000000000000000000..1bbb2d2c6d9f6c7b20cc3cc98d02e627a686f90f --- /dev/null +++ b/FindwaLBerla.cmake @@ -0,0 +1,15 @@ +set( WALBERLA_DIR WALBERLA_DIR-NOTFOUND CACHE PATH "waLBerla path" ) + +if ( WALBERLA_DIR ) + # WALBERLA_DIR has to point to the waLBerla source directory + # this command builds waLBerla (again) in the current build directory in the subfolder "walberla" (second argument) + add_subdirectory( ${WALBERLA_DIR} walberla ) + + waLBerla_import() + # Adds the 'src' and 'tests' directory of current app + list( APPEND WALBERLA_MODULE_DIRS "${CMAKE_SOURCE_DIR}/src" "${CMAKE_SOURCE_DIR}/tests" ) + list( REMOVE_DUPLICATES WALBERLA_MODULE_DIRS ) + set ( WALBERLA_MODULE_DIRS ${WALBERLA_MODULE_DIRS} CACHE INTERNAL "All folders that contain modules or tests" ) +else() + message( FATAL_ERROR "waLBerla not found - Use 'cmake -DWALBERLA_DIR=path_to_waLBerla_sources pathToApplicationSources' " ) +endif() diff --git a/examples/md.py b/examples/md.py index 574eb2e562517415639493061d3ee0e64a6ae979..30a669a839e26869071d56b74c282f9124619667 100644 --- a/examples/md.py +++ b/examples/md.py @@ -51,7 +51,8 @@ psim.add_feature_property('type', 'epsilon', pairs.real(), [sigma for i in range psim.add_feature_property('type', 'sigma6', pairs.real(), [epsilon for i in range(ntypes * ntypes)]) psim.copper_fcc_lattice(nx, ny, nz, rho, temp, ntypes) -psim.set_domain_partitioner(pairs.regular_domain_partitioner()) +psim.set_domain_partitioner(pairs.block_forest()) +#psim.set_domain_partitioner(pairs.regular_domain_partitioner()) psim.compute_thermo(100) psim.reneighbor_every(20) diff --git a/runtime/domain/block_forest.cpp b/runtime/domain/block_forest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ed6f45da8b8b80164a77646462d90e6830a26f55 --- /dev/null +++ b/runtime/domain/block_forest.cpp @@ -0,0 +1,473 @@ +#include <mpi.h> +#include <vector> +//--- +#include <blockforest/BlockForest.h> +#include <blockforest/Initialization.h> +#include <blockforest/loadbalancing/DynamicCurve.h> +#include <blockforest/loadbalancing/DynamicDiffusive.h> +#include <blockforest/loadbalancing/DynamicParMetis.h> +#include <blockforest/loadbalancing/InfoCollection.h> +#include <blockforest/loadbalancing/PODPhantomData.h> +#include <pe/amr/level_determination/MinMaxLevelDetermination.h> +#include <pe/amr/weight_assignment/MetisAssignmentFunctor.h> +#include <pe/amr/weight_assignment/WeightAssignmentFunctor.h> +//--- +#include "../pairs_common.hpp" +#include "../devices/device.hpp" +#include "regular_6d_stencil.hpp" +#include "MDDataHandling.h" + +namespace pairs { + +void BlockForest::updateNeighborhood( + std::shared_ptr<BlockForest> forest, + blockforest::InfoCollection& info, + real_t *ranks, + real_t *naabbs, + real_t *aabbs) { + + auto me = mpi::MPIManager::instance()->rank(); + for(auto& iblock: *forest) { + auto block = static_cast<blockforest::Block *>(&iblock); + auto& block_info = info[block->getId()]; + + if(block_info.computationalWeight > 0) { + for(uint neigh = 0; neigh < block->getNeighborhoodSize(); ++neigh) { + auto neighbor_rank = int_c(block->getNeighborProcess(neigh)); + + if(neighbor_rank != me) { + const BlockID& neighbor_block = block->getNeighborId(neigh); + math::AABB neighbor_aabb = block->getNeighborAABB(neigh); + auto neighbor_info = info[neighbor_block]; + auto begin = blocks_pushed[neighbor_rank].begin(); + auto end = blocks_pushed[neighbor_rank].end(); + + if(neighbor_info.computationalWeight > 0 && + find_if(begin, end, [nb](const auto &nbh) { return nbh == nb; }) == end) { + + neighborhood[neighbor_rank].push_back(neighbor_aabb); + blocks_pushed[neighbor_rank].push_back(neighbor_block); + } + } + } + } + } + + vec_ranks.clear(); + vec_naabbs.clear(); + vec_aabbs.clear(); + total_aabbs = 0; + + for(auto& nbh: neighborhood) { + auto rank = nbh.first; + auto aabb_list = nbh.second; + vec_ranks.push_back((int) rank); + vec_naabbs.push_back((int) aabb_list.size()); + + for(auto &aabb: aabb_list) { + vec_aabbs.push_back(aabb.xMin()); + vec_aabbs.push_back(aabb.xMax()); + vec_aabbs.push_back(aabb.yMin()); + vec_aabbs.push_back(aabb.yMax()); + vec_aabbs.push_back(aabb.zMin()); + vec_aabbs.push_back(aabb.zMax()); + total_aabbs++; + } + } + + *nranks = nranks; + if(nranks > *rank_capacity) { + // reallocateArray? + const int new_capacity = nranks + 10; + delete[] ranks; + delete[] naabbs; + delete[] offsets; + ranks = new int[new_capacity]; + naabbs = new int[new_capacity]; + offsets = new int[new_capacity]; + *rank_capacity = new_capacity; + } + + if(total_aabbs > *aabb_capacity) { + const int new_capacity = total_aabbs + 10; + aabbs = new real_t[new_capacity * 6]; + *aabb_capacity = new_capacity; + } + + int offset = 0; + for(int i = 0; i < nranks; i++) { + ranks[i] = vec_ranks.data()[i]; + naabbs[i] = vec_naabbs.data()[i]; + offsets[i] = offset; + offset += naabbs[i]; + } + + for(int i = 0; i < total_aabbs * 6; i++) { + aabbs[i] = vec_aabbs.data()[i]; + } + + ps->copyToDevice(aabbs); +} + +/* + extern fn md_compute_boundary_weights( + xmin: real_t, xmax: real_t, ymin: real_t, ymax: real_t, zmin: real_t, zmax: real_t, + computational_weight: &mut u32, communication_weight: &mut u32) -> () { + + let grid = grid_; + let particle = make_particle(grid, array_dev, ParticleDataLayout(), null_layout()); + let sum = @|a: i32, b: i32| { a + b }; + let aabb = AABB { + xmin: xmin, + xmax: xmax, + ymin: ymin, + ymax: ymax, + zmin: zmin, + zmax: zmax + }; + + *computational_weight = reduce_i32(grid.nparticles, 0, sum, |i| { + select(is_within_domain(particle.get_position(i), aabb), 1, 0) + }) as u32; + + *communication_weight = reduce_i32(grid.nghost, 0, sum, |i| { + select(is_within_domain(particle.get_position(grid.nparticles + i), aabb), 1, 0) + }) as u32; + */ + +void BlockForest::updateWeights( + shared_ptr<BlockForest> forest, + blockforest::InfoCollection& info, + real_t *position, + int nparticles) { + + mpi::BufferSystem bs(mpi::MPIManager::instance()->comm(), 756); + + info.clear(); + for(auto& iblock: *forest) { + auto block = static_cast<blockforest::Block *>(&iblock); + auto aabb = block->getAABB(); + auto& block_info = info[block->getId()]; + + pairs->callModule( + "computeBoundaryWeights", + aabb.xMin(), aabb.xMax(), aabb.yMin(), aabb.yMax(), aabb.zMin(), aabb.zMax(), + &(block_info.computationalWeight), &(block_info.communicationWeight)); + + for(uint_t branch = 0; branch < 8; ++branch) { + const auto b_id = BlockID(block->getId(), branch); + const auto b_aabb = forest->getAABBFromBlockId(b_id); + auto& b_info = info[b_id]; + + pairs->callModule( + "computeBoundaryWeights", + b_aabb.xMin(), b_aabb.xMax(), b_aabb.yMin(), b_aabb.yMax(), b_aabb.zMin(), b_aabb.zMax(), + &(b_info.computationalWeight), &(b_info.communicationWeight)); + } + } + + for(auto& iblock: *forest) { + auto block = static_cast<blockforest::Block *>(&iblock); + auto& block_info = info[block->getId()]; + + for(uint_t neigh = 0; neigh < block->getNeighborhoodSize(); ++neigh) { + bs.sendBuffer(block->getNeighborProcess(neigh)) << + blockforest::InfoCollection::value_type(block->getId(), block_info); + } + + for(uint_t branch = 0; branch < 8; ++branch) { + const auto b_id = BlockID(block->getId(), branch); + auto& b_info = info[b_id]; + + for(uint_t neigh = 0; neigh < block->getNeighborhoodSize(); ++neigh) { + bs.sendBuffer(block->getNeighborProcess(neigh)) << + blockforest::InfoCollection::value_type(b_id, b_info); + } + } + } + + bs.setReceiverInfoFromSendBufferState(false, true); + bs.sendAll(); + + for(auto recv = bs.begin(); recv != bs.end(); ++recv) { + while(!recv.buffer().isEmpty()) { + blockforest::InfoCollectionPair val; + recv.buffer() >> val; + info.insert(val); + } + } +} + +Vector3<uint_t> BlockForest::getBlockConfig(uint_t num_processes, uint_t nx, uint_t ny, uint_t nz) { + const uint_t bx_factor = 1; + const uint_t by_factor = 1; + const uint_t bz_factor = 1; + const uint_t ax = nx * ny; + const uint_t ay = nx * nz; + const uint_t az = ny * nz; + + uint_t bestsurf = 2 * (ax + ay + az); + uint_t x = 1; + uint_t y = 1; + uint_t z = 1; + + for(uint_t i = 1; i < num_processes; ++i) { + if(num_processes % i == 0) { + const uint_t rem_yz = num_processes / i; + + for(uint_t j = 1; j < rem_yz; ++j) { + if(rem_yz % j == 0) { + const uint_t k = rem_yz / j; + const uint_t surf = (ax / i / j) + (ay / i / k) + (az / j / k); + + if(surf < bestsurf) { + x = i, y = j, z = k; + bestsurf = surf; + } + } + } + } + } + + return Vector3<uint_t>(x * bx_factor, y * by_factor, z * bz_factor); +} + +uint_t BlockForest::getInitialRefinementLevel(uint_t num_processes) { + uint_t splitFactor = 8; + uint_t blocks = splitFactor; + uint_t refinementLevel = 1; + + while(blocks < num_processes) { + refinementLevel++; + blocks *= splitFactor; + } + + return refinementLevel; +} + +void BlockForest::getBlockForestAABB(double (&rank_aabb)[6]) { + auto aabb_union = forest->begin()->getAABB(); + + for(auto& iblock: *forest) { + auto block = static_cast<blockforest::Block *>(&iblock); + aabb_union.merge(block->getAABB()); + } + + rank_aabb[0] = aabb_union.xMin(); + rank_aabb[1] = aabb_union.xMax(); + rank_aabb[2] = aabb_union.yMin(); + rank_aabb[3] = aabb_union.yMax(); + rank_aabb[4] = aabb_union.zMin(); + rank_aabb[5] = aabb_union.zMax(); +} + +void BlockForest::setConfig() { + auto mpiManager = mpi::MPIManager::instance(); + mpiManager->initializeMPI(&argc, &argv); + mpiManager->useWorldComm(); + + math::AABB domain(xmin, ymin, zmin, xmax, ymax, zmax); + int gridsize[3] = {32, 32, 32}; + auto procs = mpiManager->numProcesses(); + auto block_config = use_load_balancing ? Vector3<uint_t>(1, 1, 1) : getBlockConfig(procs, gridsize[0], gridsize[1], gridsize[2]); + auto ref_level = use_load_balancing ? getInitialRefinementLevel(procs) : 0; + + auto forest = blockforest::createBlockForest( + domain, block_config, Vector3<bool>(true, true, true), procs, ref_level); + + auto is_within_domain = bind(isWithinBlockForest, _1, _2, _3, forest); + auto info = make_shared<blockforest::InfoCollection>(); + getBlockForestAABB(forest, rank_aabb); +} + +void BlockForest::setBoundingBox() { + MPI_Comm cartesian; + int *myloc = new int[ndims]; + int *periods = new int[ndims]; + real_t *rank_length = new real_t[ndims]; + int reorder = 0; + + for(int d = 0; d < ndims; d++) { + periods[d] = 1; + rank_length[d] = (this->grid_max[d] - this->grid_min[d]) / (real_t) nranks[d]; + } + + MPI_Cart_create(MPI_COMM_WORLD, ndims, nranks, periods, reorder, &cartesian); + MPI_Cart_get(cartesian, ndims, nranks, periods, myloc); + for(int d = 0; d < ndims; d++) { + MPI_Cart_shift(cartesian, d, 1, &(prev[d]), &(next[d])); + pbc_prev[d] = (myloc[d] == 0) ? 1 : 0; + pbc_next[d] = (myloc[d] == nranks[d] - 1) ? -1 : 0; + subdom_min[d] = this->grid_min[d] + rank_length[d] * (real_t)myloc[d]; + subdom_max[d] = subdom_min[d] + rank_length[d]; + } + + delete[] myloc; + delete[] periods; + delete[] rank_length; + MPI_Comm_free(&cartesian); +} + +void BlockForest::initialize(int *argc, char ***argv) { + MPI_Init(argc, argv); + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + this->setConfig(); + this->setBoundingBox(); +} + +void BlockForest::finalize() { + MPI_Finalize(); +} + +bool BlockForest::isWithinSubdomain(real_t x, real_t y, real_t z) { + for(auto& iblock: *forest) { + auto block = static_cast<blockforest::Block *>(&iblock); + + if(block->getAABB().contains(x, y, z)) { + return true; + } + } + + return false; +} + +void BlockForest::fillArrays(int *neighbor_ranks, int *pbc, real_t *subdom) { + for(int d = 0; d < ndims; d++) { + neighbor_ranks[d * 2 + 0] = prev[d]; + neighbor_ranks[d * 2 + 1] = next[d]; + pbc[d * 2 + 0] = pbc_prev[d]; + pbc[d * 2 + 1] = pbc_next[d]; + subdom[d * 2 + 0] = subdom_min[d]; + subdom[d * 2 + 1] = subdom_max[d]; + } +} + +void BlockForest::communicateSizes(int dim, const int *send_sizes, int *recv_sizes) { + if(prev[dim] != rank) { + MPI_Send(&send_sizes[dim * 2 + 0], 1, MPI_INT, prev[dim], 0, MPI_COMM_WORLD); + MPI_Recv(&recv_sizes[dim * 2 + 0], 1, MPI_INT, next[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } else { + recv_sizes[dim * 2 + 0] = send_sizes[dim * 2 + 0]; + } + + if(next[dim] != rank) { + MPI_Send(&send_sizes[dim * 2 + 1], 1, MPI_INT, next[dim], 0, MPI_COMM_WORLD); + MPI_Recv(&recv_sizes[dim * 2 + 1], 1, MPI_INT, prev[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } else { + recv_sizes[dim * 2 + 1] = send_sizes[dim * 2 + 1]; + } +} + +void BlockForest::communicateData( + int dim, int elem_size, + const real_t *send_buf, const int *send_offsets, const int *nsend, + real_t *recv_buf, const int *recv_offsets, const int *nrecv) { + + //MPI_Request recv_requests[2]; + //MPI_Request send_requests[2]; + const real_t *send_prev = &send_buf[send_offsets[dim * 2 + 0] * elem_size]; + const real_t *send_next = &send_buf[send_offsets[dim * 2 + 1] * elem_size]; + real_t *recv_prev = &recv_buf[recv_offsets[dim * 2 + 0] * elem_size]; + real_t *recv_next = &recv_buf[recv_offsets[dim * 2 + 1] * elem_size]; + + if(prev[dim] != rank) { + MPI_Sendrecv( + send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, + recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, next[dim], 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + /* + MPI_Irecv( + recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, + MPI_COMM_WORLD, &recv_requests[0]); + + MPI_Isend( + send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, + MPI_COMM_WORLD, &send_requests[0]); + */ + } else { + pairs::copy_in_device(recv_prev, send_prev, nsend[dim * 2 + 0] * elem_size * sizeof(real_t)); + } + + if(next[dim] != rank) { + MPI_Sendrecv( + send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, + recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, prev[dim], 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + /* + MPI_Irecv( + recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, + MPI_COMM_WORLD, &recv_requests[1]); + + MPI_Isend( + send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, + MPI_COMM_WORLD, &send_requests[1]); + */ + } else { + pairs::copy_in_device(recv_next, send_next, nsend[dim * 2 + 1] * elem_size * sizeof(real_t)); + } + + //MPI_Waitall(2, recv_requests, MPI_STATUSES_IGNORE); + //MPI_Waitall(2, send_requests, MPI_STATUSES_IGNORE); +} + +void BlockForest::communicateAllData( + int ndims, int elem_size, + const real_t *send_buf, const int *send_offsets, const int *nsend, + real_t *recv_buf, const int *recv_offsets, const int *nrecv) { + + //std::vector<MPI_Request> send_requests(ndims * 2, MPI_REQUEST_NULL); + //std::vector<MPI_Request> recv_requests(ndims * 2, MPI_REQUEST_NULL); + + for (int d = 0; d < ndims; d++) { + const real_t *send_prev = &send_buf[send_offsets[d * 2 + 0] * elem_size]; + const real_t *send_next = &send_buf[send_offsets[d * 2 + 1] * elem_size]; + real_t *recv_prev = &recv_buf[recv_offsets[d * 2 + 0] * elem_size]; + real_t *recv_next = &recv_buf[recv_offsets[d * 2 + 1] * elem_size]; + + if (prev[d] != rank) { + MPI_Sendrecv( + send_prev, nsend[d * 2 + 0] * elem_size, MPI_DOUBLE, prev[d], 0, + recv_prev, nrecv[d * 2 + 0] * elem_size, MPI_DOUBLE, next[d], 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + /* + MPI_Isend( + send_prev, nsend[d * 2 + 0] * elem_size, MPI_DOUBLE, prev[d], 0, + MPI_COMM_WORLD, &send_requests[d * 2 + 0]); + + MPI_Irecv( + recv_prev, nrecv[d * 2 + 0] * elem_size, MPI_DOUBLE, prev[d], 0, + MPI_COMM_WORLD, &recv_requests[d * 2 + 0]); + */ + } else { + pairs::copy_in_device(recv_prev, send_prev, nsend[d * 2 + 0] * elem_size * sizeof(real_t)); + } + + if (next[d] != rank) { + MPI_Sendrecv( + send_next, nsend[d * 2 + 1] * elem_size, MPI_DOUBLE, next[d], 0, + recv_next, nrecv[d * 2 + 1] * elem_size, MPI_DOUBLE, prev[d], 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + /* + MPI_Isend( + send_next, nsend[d * 2 + 1] * elem_size, MPI_DOUBLE, next[d], 0, + MPI_COMM_WORLD, &send_requests[d * 2 + 1]); + + MPI_Irecv( + recv_next, nrecv[d * 2 + 1] * elem_size, MPI_DOUBLE, next[d], 0, + MPI_COMM_WORLD, &recv_requests[d * 2 + 1]); + */ + } else { + pairs::copy_in_device(recv_next, send_next, nsend[d * 2 + 1] * elem_size * sizeof(real_t)); + } + } + + //MPI_Waitall(ndims * 2, send_requests.data(), MPI_STATUSES_IGNORE); + //MPI_Waitall(ndims * 2, recv_requests.data(), MPI_STATUSES_IGNORE); +} + +} diff --git a/runtime/domain/block_forest.hpp b/runtime/domain/block_forest.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e5f4a68745798b2d8ec0a55909b53ac47f85401e --- /dev/null +++ b/runtime/domain/block_forest.hpp @@ -0,0 +1,58 @@ +#include <map> +//--- +#include "../pairs_common.hpp" +#include "domain_partitioning.hpp" + +#pragma once + +#define SMALL 0.00001 + +namespace pairs { + +class BlockForest : public DomainPartitioner { +private: + std::shared_ptr<BlockForest> forest; + std::map<uint_t, std::vector<math::AABB>> neighborhood; + std::map<uint_t, std::vector<BlockID>> blocks_pushed; + std::vector<int> ranks; + std::vector<int> naabbs; + std::vector<double> aabbs; + real_t *subdom_min; + real_t *subdom_max; + int world_size, rank, total_aabbs; + +public: + BlockForest( + real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax) : + DomainPartitioner(xmin, xmax, ymin, ymax, zmin, zmax) { + + subdom_min = new real_t[ndims]; + subdom_max = new real_t[ndims]; + } + + ~BlockForest() { + delete[] subdom_min; + delete[] subdom_max; + } + + void setConfig(); + void setBoundingBox(); + void initialize(int *argc, char ***argv); + void finalize(); + int getWorldSize() const { return world_size; } + int getRank() const { return rank; } + int isWithinSubdomain(real_t x, real_t y, real_t z); + void fillArrays(int *neighbor_ranks, int *pbc, real_t *subdom); + void communicateSizes(int dim, const int *send_sizes, int *recv_sizes); + void communicateData( + int dim, int elem_size, + const real_t *send_buf, const int *send_offsets, const int *nsend, + real_t *recv_buf, const int *recv_offsets, const int *nrecv); + + void communicateAllData( + int ndims, int elem_size, + const real_t *send_buf, const int *send_offsets, const int *nsend, + real_t *recv_buf, const int *recv_offsets, const int *nrecv); +}; + +} diff --git a/runtime/pairs.hpp b/runtime/pairs.hpp index 8944dfda738602867d7a9fb768cf81fd7e083d90..8eeb50f40d6ad13226b713598bfdffe8f8ac1228 100644 --- a/runtime/pairs.hpp +++ b/runtime/pairs.hpp @@ -13,6 +13,7 @@ #include "runtime_var.hpp" #include "timers.hpp" #include "devices/device.hpp" +#include "domain/block_forest.hpp" #include "domain/regular_6d_stencil.hpp" #pragma once diff --git a/src/pairs/__init__.py b/src/pairs/__init__.py index e89e0a79cbb8f3e39143dc3461a9734a6dcf8d7b..3099350a0320f56b9b5a6401565415c09f32cb75 100644 --- a/src/pairs/__init__.py +++ b/src/pairs/__init__.py @@ -65,3 +65,6 @@ def regular_domain_partitioner(): def regular_domain_partitioner_xy(): return DomainPartitioners.RegularXY + +def block_forest(): + return DomainPartitioners.BlockForest diff --git a/src/pairs/sim/domain.py b/src/pairs/sim/domain.py index a30635397da67196f6d72b8c337d42e7a0062779..8126b58fea395785fcf5e486ca9573afa0d8049d 100644 --- a/src/pairs/sim/domain.py +++ b/src/pairs/sim/domain.py @@ -1,6 +1,4 @@ from pairs.ir.block import pairs_inline -from pairs.ir.functions import Call_Void -from pairs.ir.types import Types from pairs.sim.lowerable import Lowerable @@ -10,8 +8,4 @@ class InitializeDomain(Lowerable): @pairs_inline def lower(self): - dom_part = self.sim.domain_partitioning() - grid_array = [(self.sim.grid.min(d), self.sim.grid.max(d)) for d in range(self.sim.ndims())] - Call_Void(self.sim, "pairs->initDomain", [param for delim in grid_array for param in delim]), - Call_Void(self.sim, "pairs->fillCommunicationArrays", [dom_part.neighbor_ranks, dom_part.pbc, dom_part.subdom]) - + self.sim.domain_partitioning().initialize() diff --git a/src/pairs/sim/domain_partitioners.py b/src/pairs/sim/domain_partitioners.py index 6e00ad848f8496fbf8c24aa36ac3a06e813dcc07..b2e6fe28f5ac69539af78ade8800d3395915d095 100644 --- a/src/pairs/sim/domain_partitioners.py +++ b/src/pairs/sim/domain_partitioners.py @@ -2,10 +2,10 @@ class DomainPartitioners: Invalid = -1 Regular = 0 RegularXY = 1 - BoxList = 2 + BlockForest = 2 def c_keyword(layout): - return "Regular" if layout == DomainPartitioners.Regular else \ - "RegularXY" if layout == DomainPartitioners.RegularXY else \ - "BoxList" if layout == DomainPartitioners.BoxList else \ + return "Regular" if layout == DomainPartitioners.Regular else \ + "RegularXY" if layout == DomainPartitioners.RegularXY else \ + "BlockForest" if layout == DomainPartitioners.BlockForest else \ "Invalid" diff --git a/src/pairs/sim/domain_partitioning.py b/src/pairs/sim/domain_partitioning.py index 901df44744426921cb896e9c2781caff73c5521d..b4198e02c7084d1f9fcfaf30c4b74b38ee838bd4 100644 --- a/src/pairs/sim/domain_partitioning.py +++ b/src/pairs/sim/domain_partitioning.py @@ -1,5 +1,6 @@ from pairs.ir.block import pairs_device_block, pairs_host_block from pairs.ir.branches import Branch, Filter +from pairs.ir.functions import Call_Void from pairs.ir.loops import For, ParticleFor from pairs.ir.utils import Print from pairs.ir.scalars import ScalarOp @@ -28,6 +29,11 @@ class DimensionRanges: def step_indexes(self, step): return [step * 2 + 0, step * 2 + 1] + def initialize(self): + grid_array = [(self.sim.grid.min(d), self.sim.grid.max(d)) for d in range(self.sim.ndims())] + Call_Void(self.sim, "pairs->initDomain", [param for delim in grid_array for param in delim]), + Call_Void(self.sim, "pairs->fillCommunicationArrays", [self.neighbor_ranks, self.pbc, self.subdom]) + def ghost_particles(self, step, position, offset=0.0): # Particles with one of the following flags are ignored flags_to_exclude = (Flags.Infinite | Flags.Global) @@ -44,7 +50,6 @@ class DimensionRanges: def prev_neighbor(self, j, step, position, offset, flags_to_exclude): particle_flags = self.sim.particle_flags - j = step * 2 + 1 for i in For(self.sim, 0, self.sim.nlocal + self.sim.nghost): for _ in Filter(self.sim, ScalarOp.cmp(particle_flags[i] & flags_to_exclude, 0)): for _ in Filter(self.sim, position[i][step] > self.subdom[j] - offset): @@ -63,3 +68,58 @@ class DimensionRanges: j = step * 2 + 1 for _ in Filter(self.sim, ScalarOp.inline(ScalarOp.cmp(self.pbc[j], 0))): yield from prev_neighbor(self, j, step, position, offset, flags_to_exclude) + + +class BlockForest: + def __init__(self, sim): + self.sim = sim + self.nranks = sim.add_var('nranks', Types.Int32) + self.nranks_capacity = sim.add_var('nranks_capacity', Types.Int32) + self.aabb_capacity = sim.add_var('aabb_capacity', Types.Int32) + self.ranks = sim.add_static_array('ranks', [self.nranks_capacity], Types.Int32) + self.naabbs = sim.add_static_array('naabbs', [self.nranks_capacity], Types.Int32) + self.offsets = sim.add_static_array('rank_offsets', [self.nranks_capacity], Types.Int32) + self.pbc = sim.add_static_array('pbc', [self.aabb_capacity, 3], Types.Int32) + self.aabbs = sim.add_static_array('aabbs', [self.aabb_capacity, 6], Types.Real) + self.subdom = sim.add_static_array('subdom', [sim.ndims() * 2], Types.Real) + + def min(self, dim): + return self.subdom[dim * 2 + 0] + + def max(self, dim): + return self.subdom[dim * 2 + 1] + + def number_of_steps(self): + return 1 + + def step_indexes(self, step): + return [step] + + def initialize(self): + grid_array = [(self.sim.grid.min(d), self.sim.grid.max(d)) for d in range(self.sim.ndims())] + Call_Void(self.sim, "pairs->initDomain", [param for delim in grid_array for param in delim]), + Call_Void(self.sim, "pairs->fillCommunicationArrays", [self.ranks, self.pbc, self.subdom]) + + def ghost_particles(self, step, position, offset=0.0): + # Particles with one of the following flags are ignored + flags_to_exclude = (Flags.Infinite | Flags.Global) + + for i in For(self.sim, 0, self.sim.nlocal): + particle_flags = self.sim.particle_flags + + for _ in Filter(self.sim, ScalarOp.cmp(particle_flags[i] & flags_to_exclude, 0)): + for r in For(self.sim, 0, self.nranks): + for aabb_id in For(self.sim, self.offsets[r], self.offsets[r] + self.naabbs[r]): + full_cond = None + + for d in range(self.sim.ndims()): + d_cond = ScalarOp.and_op( + position[i][d] > self.aabbs[aabb_id][d * 2 + 0] + offset, + position[i][d] < self.aabbs[aabb_id][d * 2 + 1] - offset) + + full_cond = d_cond if full_cond is None else \ + ScalarOp.and_op(full_cond, d_cond) + + for _ in Filter(self.sim, full_cond): + pbc_shifts = [self.pbc[aabb_id][d] for d in range(self.sim.ndims())] + yield i, r, self.ranks[r], pbc_shifts diff --git a/src/pairs/sim/simulation.py b/src/pairs/sim/simulation.py index 291f085666fdb6cdac4e4f3d3b3a28969ab76c78..defaf998a2a04c12d244c3e6f6e1b47fce0e9ac4 100644 --- a/src/pairs/sim/simulation.py +++ b/src/pairs/sim/simulation.py @@ -19,7 +19,7 @@ from pairs.sim.copper_fcc_lattice import CopperFCCLattice from pairs.sim.dem_sc_grid import DEMSCGrid from pairs.sim.domain import InitializeDomain from pairs.sim.domain_partitioners import DomainPartitioners -from pairs.sim.domain_partitioning import DimensionRanges +from pairs.sim.domain_partitioning import BlockForest, DimensionRanges from pairs.sim.features import AllocateFeatureProperties from pairs.sim.grid import Grid2D, Grid3D from pairs.sim.instrumentation import RegisterMarkers, RegisterTimers @@ -107,6 +107,9 @@ class Simulation: if partitioner in (DomainPartitioners.Regular, DomainPartitioners.RegularXY): self._dom_part = DimensionRanges(self) + elif partitioner == DomainPartitioners.BlockForest: + self._dom_part = BlockForest(self) + else: raise Exception("Invalid domain partitioner.")