diff --git a/runtime/domain/block_forest.cpp b/runtime/domain/block_forest.cpp index ed6f45da8b8b80164a77646462d90e6830a26f55..cc6ec7fca84610b3fbec214e1d4de8ebcddad4bd 100644 --- a/runtime/domain/block_forest.cpp +++ b/runtime/domain/block_forest.cpp @@ -75,40 +75,26 @@ void BlockForest::updateNeighborhood( } } + // TODO: delegate these next lines to functions (getRuntimeInt?) *nranks = nranks; - if(nranks > *rank_capacity) { - // reallocateArray? - const int new_capacity = nranks + 10; - delete[] ranks; - delete[] naabbs; - delete[] offsets; - ranks = new int[new_capacity]; - naabbs = new int[new_capacity]; - offsets = new int[new_capacity]; - *rank_capacity = new_capacity; - } - - if(total_aabbs > *aabb_capacity) { - const int new_capacity = total_aabbs + 10; - aabbs = new real_t[new_capacity * 6]; - *aabb_capacity = new_capacity; - } - - int offset = 0; - for(int i = 0; i < nranks; i++) { - ranks[i] = vec_ranks.data()[i]; - naabbs[i] = vec_naabbs.data()[i]; - offsets[i] = offset; - offset += naabbs[i]; - } - - for(int i = 0; i < total_aabbs * 6; i++) { - aabbs[i] = vec_aabbs.data()[i]; - } + *total_aabbs = total_aabbs; ps->copyToDevice(aabbs); } +void BlockForest::copyRuntimeArray(const std::string& name, void *dest, const int size) { + void *src = name.compare('ranks') ? vec_ranks.data() : + name.compare('naabbs') ? vec_naabbs.data() : + name.compare('rank_offsets') ? offsets : + name.compare('pbc') ? vec_pbc.data() : + name.compare('aabbs') ? vec_aabbs.data() : + name.compare('subdom') ? subdom; + + bool is_real = name.compare('aabbs') || name.compare('subdom'); + int tsize = (is_real) ? sizeof(real_t) : sizeof(int); + std::memcpy(dest, src, size * tsize) +} + /* extern fn md_compute_boundary_weights( xmin: real_t, xmax: real_t, ymin: real_t, ymax: real_t, zmin: real_t, zmax: real_t, @@ -149,8 +135,7 @@ void BlockForest::updateWeights( auto aabb = block->getAABB(); auto& block_info = info[block->getId()]; - pairs->callModule( - "computeBoundaryWeights", + pairs->computeBoundaryWeights( aabb.xMin(), aabb.xMax(), aabb.yMin(), aabb.yMax(), aabb.zMin(), aabb.zMax(), &(block_info.computationalWeight), &(block_info.communicationWeight)); @@ -159,8 +144,7 @@ void BlockForest::updateWeights( const auto b_aabb = forest->getAABBFromBlockId(b_id); auto& b_info = info[b_id]; - pairs->callModule( - "computeBoundaryWeights", + pairs->computeBoundaryWeights( b_aabb.xMin(), b_aabb.xMax(), b_aabb.yMin(), b_aabb.yMax(), b_aabb.zMin(), b_aabb.zMax(), &(b_info.computationalWeight), &(b_info.communicationWeight)); } @@ -261,7 +245,11 @@ void BlockForest::getBlockForestAABB(double (&rank_aabb)[6]) { rank_aabb[5] = aabb_union.zMax(); } -void BlockForest::setConfig() { +void BlockForest::initialize(int *argc, char ***argv) { + MPI_Init(argc, argv); + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + auto mpiManager = mpi::MPIManager::instance(); mpiManager->initializeMPI(&argc, &argv); mpiManager->useWorldComm(); @@ -278,42 +266,9 @@ void BlockForest::setConfig() { auto is_within_domain = bind(isWithinBlockForest, _1, _2, _3, forest); auto info = make_shared<blockforest::InfoCollection>(); getBlockForestAABB(forest, rank_aabb); -} - -void BlockForest::setBoundingBox() { - MPI_Comm cartesian; - int *myloc = new int[ndims]; - int *periods = new int[ndims]; - real_t *rank_length = new real_t[ndims]; - int reorder = 0; - for(int d = 0; d < ndims; d++) { - periods[d] = 1; - rank_length[d] = (this->grid_max[d] - this->grid_min[d]) / (real_t) nranks[d]; - } - - MPI_Cart_create(MPI_COMM_WORLD, ndims, nranks, periods, reorder, &cartesian); - MPI_Cart_get(cartesian, ndims, nranks, periods, myloc); - for(int d = 0; d < ndims; d++) { - MPI_Cart_shift(cartesian, d, 1, &(prev[d]), &(next[d])); - pbc_prev[d] = (myloc[d] == 0) ? 1 : 0; - pbc_next[d] = (myloc[d] == nranks[d] - 1) ? -1 : 0; - subdom_min[d] = this->grid_min[d] + rank_length[d] * (real_t)myloc[d]; - subdom_max[d] = subdom_min[d] + rank_length[d]; - } - - delete[] myloc; - delete[] periods; - delete[] rank_length; - MPI_Comm_free(&cartesian); -} - -void BlockForest::initialize(int *argc, char ***argv) { - MPI_Init(argc, argv); - MPI_Comm_size(MPI_COMM_WORLD, &world_size); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - this->setConfig(); - this->setBoundingBox(); + //subdom_min[d] = this->grid_min[d] + rank_length[d] * (real_t)myloc[d]; + //subdom_max[d] = subdom_min[d] + rank_length[d]; } void BlockForest::finalize() { @@ -332,17 +287,6 @@ bool BlockForest::isWithinSubdomain(real_t x, real_t y, real_t z) { return false; } -void BlockForest::fillArrays(int *neighbor_ranks, int *pbc, real_t *subdom) { - for(int d = 0; d < ndims; d++) { - neighbor_ranks[d * 2 + 0] = prev[d]; - neighbor_ranks[d * 2 + 1] = next[d]; - pbc[d * 2 + 0] = pbc_prev[d]; - pbc[d * 2 + 1] = pbc_next[d]; - subdom[d * 2 + 0] = subdom_min[d]; - subdom[d * 2 + 1] = subdom_max[d]; - } -} - void BlockForest::communicateSizes(int dim, const int *send_sizes, int *recv_sizes) { if(prev[dim] != rank) { MPI_Send(&send_sizes[dim * 2 + 0], 1, MPI_INT, prev[dim], 0, MPI_COMM_WORLD); diff --git a/runtime/domain/regular_6d_stencil.cpp b/runtime/domain/regular_6d_stencil.cpp index b01d2c76dc097391fd0e0204cc291f6ee65442b2..e02f0a3ada55cb5d87e2df465ae21ad535f1978a 100644 --- a/runtime/domain/regular_6d_stencil.cpp +++ b/runtime/domain/regular_6d_stencil.cpp @@ -71,7 +71,7 @@ void Regular6DStencil::setBoundingBox() { MPI_Cart_shift(cartesian, d, 1, &(prev[d]), &(next[d])); pbc_prev[d] = (myloc[d] == 0) ? 1 : 0; pbc_next[d] = (myloc[d] == nranks[d] - 1) ? -1 : 0; - subdom_min[d] = this->grid_min[d] + rank_length[d] * (real_t)myloc[d]; + subdom_min[d] = this->grid_min[d] + rank_length[d] * (real_t) myloc[d]; subdom_max[d] = subdom_min[d] + rank_length[d]; } @@ -99,14 +99,25 @@ int Regular6DStencil::isWithinSubdomain(real_t x, real_t y, real_t z) { z >= subdom_min[2] && z < subdom_max[2] - SMALL; } -void Regular6DStencil::fillArrays(int *neighbor_ranks, int *pbc, real_t *subdom) { +void Regular6DStencil::copyRuntimeArray(const std::string& name, void *dest, const int size) { for(int d = 0; d < ndims; d++) { - neighbor_ranks[d * 2 + 0] = prev[d]; - neighbor_ranks[d * 2 + 1] = next[d]; - pbc[d * 2 + 0] = pbc_prev[d]; - pbc[d * 2 + 1] = pbc_next[d]; - subdom[d * 2 + 0] = subdom_min[d]; - subdom[d * 2 + 1] = subdom_max[d]; + if(name.compare('neighbor_ranks')) { + int *neighbor_ranks = static_cast<int *>(dest); + neighbor_ranks[d * 2 + 0] = prev[d]; + neighbor_ranks[d * 2 + 1] = next[d]; + } + + if(name.compare('pbc')) { + int *pbc = static_cast<int *>(dest); + pbc[d * 2 + 0] = pbc_prev[d]; + pbc[d * 2 + 1] = pbc_next[d]; + } + + if(name.compare('subdom')) { + real_t *subdom = static_cast<real_t *>(dest); + subdom[d * 2 + 0] = subdom_min[d]; + subdom[d * 2 + 1] = subdom_max[d]; + } } } @@ -131,8 +142,6 @@ void Regular6DStencil::communicateData( const real_t *send_buf, const int *send_offsets, const int *nsend, real_t *recv_buf, const int *recv_offsets, const int *nrecv) { - //MPI_Request recv_requests[2]; - //MPI_Request send_requests[2]; const real_t *send_prev = &send_buf[send_offsets[dim * 2 + 0] * elem_size]; const real_t *send_next = &send_buf[send_offsets[dim * 2 + 1] * elem_size]; real_t *recv_prev = &recv_buf[recv_offsets[dim * 2 + 0] * elem_size]; @@ -143,16 +152,6 @@ void Regular6DStencil::communicateData( send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, next[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - - /* - MPI_Irecv( - recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, - MPI_COMM_WORLD, &recv_requests[0]); - - MPI_Isend( - send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, - MPI_COMM_WORLD, &send_requests[0]); - */ } else { pairs::copy_in_device(recv_prev, send_prev, nsend[dim * 2 + 0] * elem_size * sizeof(real_t)); } @@ -162,22 +161,9 @@ void Regular6DStencil::communicateData( send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, prev[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - - /* - MPI_Irecv( - recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, - MPI_COMM_WORLD, &recv_requests[1]); - - MPI_Isend( - send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, - MPI_COMM_WORLD, &send_requests[1]); - */ } else { pairs::copy_in_device(recv_next, send_next, nsend[dim * 2 + 1] * elem_size * sizeof(real_t)); } - - //MPI_Waitall(2, recv_requests, MPI_STATUSES_IGNORE); - //MPI_Waitall(2, send_requests, MPI_STATUSES_IGNORE); } void Regular6DStencil::communicateAllData( @@ -185,9 +171,6 @@ void Regular6DStencil::communicateAllData( const real_t *send_buf, const int *send_offsets, const int *nsend, real_t *recv_buf, const int *recv_offsets, const int *nrecv) { - //std::vector<MPI_Request> send_requests(ndims * 2, MPI_REQUEST_NULL); - //std::vector<MPI_Request> recv_requests(ndims * 2, MPI_REQUEST_NULL); - for (int d = 0; d < ndims; d++) { const real_t *send_prev = &send_buf[send_offsets[d * 2 + 0] * elem_size]; const real_t *send_next = &send_buf[send_offsets[d * 2 + 1] * elem_size]; @@ -199,16 +182,6 @@ void Regular6DStencil::communicateAllData( send_prev, nsend[d * 2 + 0] * elem_size, MPI_DOUBLE, prev[d], 0, recv_prev, nrecv[d * 2 + 0] * elem_size, MPI_DOUBLE, next[d], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - - /* - MPI_Isend( - send_prev, nsend[d * 2 + 0] * elem_size, MPI_DOUBLE, prev[d], 0, - MPI_COMM_WORLD, &send_requests[d * 2 + 0]); - - MPI_Irecv( - recv_prev, nrecv[d * 2 + 0] * elem_size, MPI_DOUBLE, prev[d], 0, - MPI_COMM_WORLD, &recv_requests[d * 2 + 0]); - */ } else { pairs::copy_in_device(recv_prev, send_prev, nsend[d * 2 + 0] * elem_size * sizeof(real_t)); } @@ -218,23 +191,10 @@ void Regular6DStencil::communicateAllData( send_next, nsend[d * 2 + 1] * elem_size, MPI_DOUBLE, next[d], 0, recv_next, nrecv[d * 2 + 1] * elem_size, MPI_DOUBLE, prev[d], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - - /* - MPI_Isend( - send_next, nsend[d * 2 + 1] * elem_size, MPI_DOUBLE, next[d], 0, - MPI_COMM_WORLD, &send_requests[d * 2 + 1]); - - MPI_Irecv( - recv_next, nrecv[d * 2 + 1] * elem_size, MPI_DOUBLE, next[d], 0, - MPI_COMM_WORLD, &recv_requests[d * 2 + 1]); - */ } else { pairs::copy_in_device(recv_next, send_next, nsend[d * 2 + 1] * elem_size * sizeof(real_t)); } } - - //MPI_Waitall(ndims * 2, send_requests.data(), MPI_STATUSES_IGNORE); - //MPI_Waitall(ndims * 2, recv_requests.data(), MPI_STATUSES_IGNORE); } } diff --git a/runtime/pairs.cpp b/runtime/pairs.cpp index f3d56730ab38e04fd4f9ea975643069d78cfe2db..09030f3e77c081e59fef17559b8590311a6d2e2d 100644 --- a/runtime/pairs.cpp +++ b/runtime/pairs.cpp @@ -515,8 +515,8 @@ void PairsSimulation::communicateContactHistoryData( #endif } -void PairsSimulation::fillCommunicationArrays(int *neighbor_ranks, int *pbc, real_t *subdom) { - this->getDomainPartitioner()->fillArrays(neighbor_ranks, pbc, subdom); +void PairsSimulation::copyRuntimeArray(const std::string& name, const void *dest, const int size) { + this->getDomainPartitioner()->copyRuntimeArray(name, dest, size); } } diff --git a/runtime/pairs.hpp b/runtime/pairs.hpp index 8eeb50f40d6ad13226b713598bfdffe8f8ac1228..94a49fa0b954fa28f1251c330ccfae87dab918a8 100644 --- a/runtime/pairs.hpp +++ b/runtime/pairs.hpp @@ -283,7 +283,8 @@ public: const real_t *send_buf, const int *contact_soffsets, const int *nsend_contact, real_t *recv_buf, int *contact_roffsets, int *nrecv_contact); - void fillCommunicationArrays(int neighbor_ranks[], int pbc[], real_t subdom[]); + void PairsSimulation::copyRuntimeArray( + const std::string& name, const void *dest, const int size); // Device functions void sync() { device_synchronize(); } diff --git a/src/pairs/sim/domain_partitioning.py b/src/pairs/sim/domain_partitioning.py index b4198e02c7084d1f9fcfaf30c4b74b38ee838bd4..47e91b360ea2d1ad0401352af4bcf24108d2a8df 100644 --- a/src/pairs/sim/domain_partitioning.py +++ b/src/pairs/sim/domain_partitioning.py @@ -31,8 +31,10 @@ class DimensionRanges: def initialize(self): grid_array = [(self.sim.grid.min(d), self.sim.grid.max(d)) for d in range(self.sim.ndims())] - Call_Void(self.sim, "pairs->initDomain", [param for delim in grid_array for param in delim]), - Call_Void(self.sim, "pairs->fillCommunicationArrays", [self.neighbor_ranks, self.pbc, self.subdom]) + Call_Void(self.sim, "pairs->initDomain", [param for delim in grid_array for param in delim]) + Call_Void(self.sim, "pairs->copyRuntimeArray", ['neighbor_ranks', self.neighbor_ranks, sim.ndims() * 2]) + Call_Void(self.sim, "pairs->copyRuntimeArray", ['pbc', self.pbc, sim.ndims() * 2]) + Call_Void(self.sim, "pairs->copyRuntimeArray", ['subdom', self.subdom, sim.ndims() * 2]) def ghost_particles(self, step, position, offset=0.0): # Particles with one of the following flags are ignored @@ -97,8 +99,14 @@ class BlockForest: def initialize(self): grid_array = [(self.sim.grid.min(d), self.sim.grid.max(d)) for d in range(self.sim.ndims())] - Call_Void(self.sim, "pairs->initDomain", [param for delim in grid_array for param in delim]), - Call_Void(self.sim, "pairs->fillCommunicationArrays", [self.ranks, self.pbc, self.subdom]) + Call_Void(self.sim, "pairs->initDomain", [param for delim in grid_array for param in delim]) + # TODO: Check nranks_capacity and aabb_capacity and resize arrays if needed + Call_Void(self.sim, "pairs->copyRuntimeArray", ['ranks', self.ranks, self.nranks_capacity]) + Call_Void(self.sim, "pairs->copyRuntimeArray", ['naabbs', self.naabbs, self.nranks_capacity]) + Call_Void(self.sim, "pairs->copyRuntimeArray", ['rank_offsets', self.rank_offsets, self.nranks_capacity]) + Call_Void(self.sim, "pairs->copyRuntimeArray", ['pbc', self.pbc, self.aabb_capacity * 3]) + Call_Void(self.sim, "pairs->copyRuntimeArray", ['aabbs', self.aabbs, self.aabb_capacity * 6]) + Call_Void(self.sim, "pairs->copyRuntimeArray", ['subdom', self.subdom, sim.ndims() * 2]) def ghost_particles(self, step, position, offset=0.0): # Particles with one of the following flags are ignored