diff --git a/runtime/boundary_weights.hpp b/runtime/boundary_weights.hpp new file mode 100644 index 0000000000000000000000000000000000000000..165a62da6d6405838bcc27182773b6dd540eefa4 --- /dev/null +++ b/runtime/boundary_weights.hpp @@ -0,0 +1,54 @@ +#include <iostream> +#include <string.h> +#include <fstream> +#include <sstream> +//--- +#include "pairs.hpp" +#include "pairs_common.hpp" +#include "gen/property_interface.hpp" + +#pragma once + +namespace pairs { + +void compute_boundary_weights( + PairsSimulation *ps, + real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax, + int *comp_weight, int *comm_weight) { + + const int particle_capacity = ps->getParticleCapacity(); + const int nlocal = ps->getNumberOfLocalParticles(); + const int nghost = ps->getNumberOfGhostParticles(); + auto position_prop = ps->getPropertyByName("position"); + + //ps->copyPropertyToDevice(position_prop, Ignore); + + *comp_weight = 0; + *comm_weight = 0; + + for(int i = 0; i < nlocal; i++) { + real_t pos_x = pairs_interface::getPosition(position_ptr, i, 0, particle_capacity); + real_t pos_y = pairs_interface::getPosition(position_ptr, i, 1, particle_capacity); + real_t pos_z = pairs_interface::getPosition(position_ptr, i, 2, particle_capacity); + + if( pos_x > xmin && pos_x <= xmax && + pos_y > ymin && pos_y <= ymax && + pos_z > zmin && pos_z <= zmax) { + *comp_weight++; + } + } + + for(int i = nlocal; i < nlocal + nghost; i++) { + real_t pos_x = pairs_interface::getPosition(position_ptr, i, 0, particle_capacity); + real_t pos_y = pairs_interface::getPosition(position_ptr, i, 1, particle_capacity); + real_t pos_z = pairs_interface::getPosition(position_ptr, i, 2, particle_capacity); + + if( pos_x > xmin && pos_x <= xmax && + pos_y > ymin && pos_y <= ymax && + pos_z > zmin && pos_z <= zmax) { + *comm_weight++; + } + } +} + +} \ No newline at end of file diff --git a/runtime/domain/block_forest.cpp b/runtime/domain/block_forest.cpp index af30587cedd075b475353630a33060c625fbfdbd..8da11deaa0568af1a9b56c5e476c7072c7456825 100644 --- a/runtime/domain/block_forest.cpp +++ b/runtime/domain/block_forest.cpp @@ -12,6 +12,7 @@ #include <pe/amr/weight_assignment/MetisAssignmentFunctor.h> #include <pe/amr/weight_assignment/WeightAssignmentFunctor.h> //--- +#include "../boundary_weights.hpp" #include "../pairs_common.hpp" #include "../devices/device.hpp" #include "regular_6d_stencil.hpp" @@ -21,6 +22,9 @@ namespace pairs { void BlockForest::updateNeighborhood() { auto me = mpi::MPIManager::instance()->rank(); + ranks.clear(); + naabbs.clear(); + aabbs.clear(); this->nranks = 0; this->total_aabbs = 0; @@ -50,23 +54,19 @@ void BlockForest::updateNeighborhood() { } } - vec_ranks.clear(); - vec_naabbs.clear(); - vec_aabbs.clear(); - for(auto& nbh: neighborhood) { auto rank = nbh.first; auto aabb_list = nbh.second; - vec_ranks.push_back((int) rank); - vec_naabbs.push_back((int) aabb_list.size()); + ranks.push_back((int) rank); + naabbs.push_back((int) aabb_list.size()); for(auto &aabb: aabb_list) { - vec_aabbs.push_back(aabb.xMin()); - vec_aabbs.push_back(aabb.xMax()); - vec_aabbs.push_back(aabb.yMin()); - vec_aabbs.push_back(aabb.yMax()); - vec_aabbs.push_back(aabb.zMin()); - vec_aabbs.push_back(aabb.zMax()); + aabbs.push_back(aabb.xMin()); + aabbs.push_back(aabb.xMax()); + aabbs.push_back(aabb.yMin()); + aabbs.push_back(aabb.yMax()); + aabbs.push_back(aabb.zMin()); + aabbs.push_back(aabb.zMax()); this->total_aabbs++; } @@ -77,7 +77,7 @@ void BlockForest::updateNeighborhood() { void BlockForest::copyRuntimeArray(const std::string& name, void *dest, const int size) { - void *src = name.compare('ranks') ? vec_ranks.data() : + void *src = name.compare('ranks') ? ranks.data() : name.compare('naabbs') ? vec_naabbs.data() : name.compare('rank_offsets') ? offsets : name.compare('pbc') ? vec_pbc.data() : @@ -89,33 +89,7 @@ void BlockForest::copyRuntimeArray(const std::string& name, void *dest, const in std::memcpy(dest, src, size * tsize); } -/* - extern fn md_compute_boundary_weights( - xmin: real_t, xmax: real_t, ymin: real_t, ymax: real_t, zmin: real_t, zmax: real_t, - computational_weight: &mut u32, communication_weight: &mut u32) -> () { - - let grid = grid_; - let particle = make_particle(grid, array_dev, ParticleDataLayout(), null_layout()); - let sum = @|a: i32, b: i32| { a + b }; - let aabb = AABB { - xmin: xmin, - xmax: xmax, - ymin: ymin, - ymax: ymax, - zmin: zmin, - zmax: zmax - }; - - *computational_weight = reduce_i32(grid.nparticles, 0, sum, |i| { - select(is_within_domain(particle.get_position(i), aabb), 1, 0) - }) as u32; - - *communication_weight = reduce_i32(grid.nghost, 0, sum, |i| { - select(is_within_domain(particle.get_position(grid.nparticles + i), aabb), 1, 0) - }) as u32; - */ - -void BlockForest::updateWeights(real_t *position, int nparticles) { +void BlockForest::updateWeights(PairsSimulation *ps, int nparticles) { mpi::BufferSystem bs(mpi::MPIManager::instance()->comm(), 756); info.clear(); @@ -124,7 +98,8 @@ void BlockForest::updateWeights(real_t *position, int nparticles) { auto aabb = block->getAABB(); auto& block_info = info[block->getId()]; - pairs->computeBoundaryWeights( + pairs::compute_boundary_weights( + ps, aabb.xMin(), aabb.xMax(), aabb.yMin(), aabb.yMax(), aabb.zMin(), aabb.zMax(), &(block_info.computationalWeight), &(block_info.communicationWeight)); @@ -133,7 +108,8 @@ void BlockForest::updateWeights(real_t *position, int nparticles) { const auto b_aabb = forest->getAABBFromBlockId(b_id); auto& b_info = info[b_id]; - pairs->computeBoundaryWeights( + pairs::compute_boundary_weights( + ps, b_aabb.xMin(), b_aabb.xMax(), b_aabb.yMin(), b_aabb.yMax(), b_aabb.zMin(), b_aabb.zMax(), &(b_info.computationalWeight), &(b_info.communicationWeight)); } @@ -352,20 +328,19 @@ bool BlockForest::isWithinSubdomain(real_t x, real_t y, real_t z) { return false; } -void BlockForest::communicateSizes(int dim, const int *send_sizes, int *recv_sizes) { - if(prev[dim] != rank) { - MPI_Send(&send_sizes[dim * 2 + 0], 1, MPI_INT, prev[dim], 0, MPI_COMM_WORLD); - MPI_Recv(&recv_sizes[dim * 2 + 0], 1, MPI_INT, next[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - } else { - recv_sizes[dim * 2 + 0] = send_sizes[dim * 2 + 0]; - } +void BlockForest::communicateSizes(int dim, const int *nsend, int *nrecv) { + std::vector<MPI_Request> send_requests(ranks.size(), MPI_REQUEST_NULL); + std::vector<MPI_Request> recv_requests(ranks.size(), MPI_REQUEST_NULL); + size_t nranks = 0; - if(next[dim] != rank) { - MPI_Send(&send_sizes[dim * 2 + 1], 1, MPI_INT, next[dim], 0, MPI_COMM_WORLD); - MPI_Recv(&recv_sizes[dim * 2 + 1], 1, MPI_INT, prev[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - } else { - recv_sizes[dim * 2 + 1] = send_sizes[dim * 2 + 1]; + for(auto neigh_rank: ranks) { + MPI_Irecv(&recv_sizes[i], 1, MPI_INT, neigh_rank, 0, MPI_COMM_WORLD, &nrecv[i], &recv_requests[i]); + MPI_Isend(&send_sizes[i], 1, MPI_INT, neigh_rank, 0, MPI_COMM_WORLD, &nsend[i], &send_requests[i]); + nranks++; } + + MPI_Waitall(nranks, send_requests.data(), MPI_STATUSES_IGNORE); + MPI_Waitall(nranks, recv_requests.data(), MPI_STATUSES_IGNORE); } void BlockForest::communicateData( @@ -373,110 +348,41 @@ void BlockForest::communicateData( const real_t *send_buf, const int *send_offsets, const int *nsend, real_t *recv_buf, const int *recv_offsets, const int *nrecv) { - //MPI_Request recv_requests[2]; - //MPI_Request send_requests[2]; - const real_t *send_prev = &send_buf[send_offsets[dim * 2 + 0] * elem_size]; - const real_t *send_next = &send_buf[send_offsets[dim * 2 + 1] * elem_size]; - real_t *recv_prev = &recv_buf[recv_offsets[dim * 2 + 0] * elem_size]; - real_t *recv_next = &recv_buf[recv_offsets[dim * 2 + 1] * elem_size]; - - if(prev[dim] != rank) { - MPI_Sendrecv( - send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, - recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, next[dim], 0, - MPI_COMM_WORLD, MPI_STATUS_IGNORE); - - /* - MPI_Irecv( - recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, - MPI_COMM_WORLD, &recv_requests[0]); - - MPI_Isend( - send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, - MPI_COMM_WORLD, &send_requests[0]); - */ - } else { - pairs::copy_in_device(recv_prev, send_prev, nsend[dim * 2 + 0] * elem_size * sizeof(real_t)); - } - - if(next[dim] != rank) { - MPI_Sendrecv( - send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, - recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, prev[dim], 0, - MPI_COMM_WORLD, MPI_STATUS_IGNORE); - - /* - MPI_Irecv( - recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, - MPI_COMM_WORLD, &recv_requests[1]); - - MPI_Isend( - send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, - MPI_COMM_WORLD, &send_requests[1]); - */ - } else { - pairs::copy_in_device(recv_next, send_next, nsend[dim * 2 + 1] * elem_size * sizeof(real_t)); - } - - //MPI_Waitall(2, recv_requests, MPI_STATUSES_IGNORE); - //MPI_Waitall(2, send_requests, MPI_STATUSES_IGNORE); -} - -void BlockForest::communicateAllData( - int ndims, int elem_size, - const real_t *send_buf, const int *send_offsets, const int *nsend, - real_t *recv_buf, const int *recv_offsets, const int *nrecv) { - - //std::vector<MPI_Request> send_requests(ndims * 2, MPI_REQUEST_NULL); - //std::vector<MPI_Request> recv_requests(ndims * 2, MPI_REQUEST_NULL); + std::vector<MPI_Request> send_requests(ranks.size(), MPI_REQUEST_NULL); + std::vector<MPI_Request> recv_requests(ranks.size(), MPI_REQUEST_NULL); + size_t nranks = 0; - for (int d = 0; d < ndims; d++) { - const real_t *send_prev = &send_buf[send_offsets[d * 2 + 0] * elem_size]; - const real_t *send_next = &send_buf[send_offsets[d * 2 + 1] * elem_size]; - real_t *recv_prev = &recv_buf[recv_offsets[d * 2 + 0] * elem_size]; - real_t *recv_next = &recv_buf[recv_offsets[d * 2 + 1] * elem_size]; + for(auto neigh_rank: ranks) { + const real_t *send_ptr = &send_buf[send_offsets[nranks] * elem_size]; + real_t *recv_ptr = &recv_buf[recv_offsets[nranks] * elem_size]; - if (prev[d] != rank) { - MPI_Sendrecv( - send_prev, nsend[d * 2 + 0] * elem_size, MPI_DOUBLE, prev[d], 0, - recv_prev, nrecv[d * 2 + 0] * elem_size, MPI_DOUBLE, next[d], 0, - MPI_COMM_WORLD, MPI_STATUS_IGNORE); + if(neigh_rank != rank) { + MPI_Irecv( + recv_ptr, nrecv[nranks] * elem_size, MPI_DOUBLE, neigh_rank, 0, + MPI_COMM_WORLD, &recv_requests[0]); - /* MPI_Isend( - send_prev, nsend[d * 2 + 0] * elem_size, MPI_DOUBLE, prev[d], 0, - MPI_COMM_WORLD, &send_requests[d * 2 + 0]); + send_ptr, nsend[nranks] * elem_size, MPI_DOUBLE, neigh_rank, 0, + MPI_COMM_WORLD, &send_requests[0]); - MPI_Irecv( - recv_prev, nrecv[d * 2 + 0] * elem_size, MPI_DOUBLE, prev[d], 0, - MPI_COMM_WORLD, &recv_requests[d * 2 + 0]); - */ + nranks++; } else { - pairs::copy_in_device(recv_prev, send_prev, nsend[d * 2 + 0] * elem_size * sizeof(real_t)); + pairs::copy_in_device(recv_ptr, send_ptr, nsend[nranks] * elem_size * sizeof(real_t)); } - if (next[d] != rank) { - MPI_Sendrecv( - send_next, nsend[d * 2 + 1] * elem_size, MPI_DOUBLE, next[d], 0, - recv_next, nrecv[d * 2 + 1] * elem_size, MPI_DOUBLE, prev[d], 0, - MPI_COMM_WORLD, MPI_STATUS_IGNORE); + nranks++; + } - /* - MPI_Isend( - send_next, nsend[d * 2 + 1] * elem_size, MPI_DOUBLE, next[d], 0, - MPI_COMM_WORLD, &send_requests[d * 2 + 1]); + MPI_Waitall(nranks, send_requests.data(), MPI_STATUSES_IGNORE); + MPI_Waitall(nranks, recv_requests.data(), MPI_STATUSES_IGNORE); +} - MPI_Irecv( - recv_next, nrecv[d * 2 + 1] * elem_size, MPI_DOUBLE, next[d], 0, - MPI_COMM_WORLD, &recv_requests[d * 2 + 1]); - */ - } else { - pairs::copy_in_device(recv_next, send_next, nsend[d * 2 + 1] * elem_size * sizeof(real_t)); - } - } +void BlockForest::communicateAllData( + int ndims, int elem_size, + const real_t *send_buf, const int *send_offsets, const int *nsend, + real_t *recv_buf, const int *recv_offsets, const int *nrecv) { - //MPI_Waitall(ndims * 2, send_requests.data(), MPI_STATUSES_IGNORE); - //MPI_Waitall(ndims * 2, recv_requests.data(), MPI_STATUSES_IGNORE); + this->communicateData(0, elem_size, send_buf, send_offsets, nsend, recv_buf, recv_offsets, nrecv); } } diff --git a/runtime/pairs.hpp b/runtime/pairs.hpp index 000e6c47383a2a6996de27d89a15a4bc8702b3b7..ca56b6ec6095cef180269a09aa5cbf73f488915d 100644 --- a/runtime/pairs.hpp +++ b/runtime/pairs.hpp @@ -36,13 +36,23 @@ private: std::vector<Array> arrays; DeviceFlags *prop_flags, *contact_prop_flags, *array_flags; Timers<double> *timers; + int *nlocal, *nghost; public: - PairsSimulation(int nprops_, int ncontactprops_, int narrays_, DomainPartitioners dom_part_type_) { + PairsSimulation( + int nprops_, + int ncontactprops_, + int narrays_, + int *nlocal_, + int *nghost_, + DomainPartitioners dom_part_type_) { + dom_part_type = dom_part_type_; prop_flags = new DeviceFlags(nprops_); contact_prop_flags = new DeviceFlags(ncontactprops_); array_flags = new DeviceFlags(narrays_); + nlocal = nlocal_; + nghost = nghost_; timers = new Timers<double>(1e-6); } @@ -60,6 +70,9 @@ public: return RuntimeVar<T>(h_ptr); } + const int getNumberOfLocalParticles() { return *nlocal; } + const int getNumberOfGhostParticles() { return *nghost; } + // Arrays Array &getArray(array_t id); Array &getArrayByName(std::string name); diff --git a/src/pairs/sim/comm.py b/src/pairs/sim/comm.py index 3a439d8048517f2b4c8b0cb5023c1942787d71bd..08fb1438ae62ce34d57b2f76435f33db304e6780 100644 --- a/src/pairs/sim/comm.py +++ b/src/pairs/sim/comm.py @@ -24,23 +24,22 @@ class Comm: self.send_capacity = sim.add_var('send_capacity', Types.Int32, 200000) self.recv_capacity = sim.add_var('recv_capacity', Types.Int32, 200000) self.elem_capacity = sim.add_var('elem_capacity', Types.Int32, 40) - self.neigh_capacity = sim.add_var('neigh_capacity', Types.Int32, 10) - self.nsend = sim.add_array('nsend', [self.neigh_capacity], Types.Int32) - self.send_offsets = sim.add_array('send_offsets', [self.neigh_capacity], Types.Int32) + self.nsend = sim.add_array('nsend', [dom_part.nranks_capacity], Types.Int32) + self.send_offsets = sim.add_array('send_offsets', [dom_part.nranks_capacity], Types.Int32) self.send_buffer = sim.add_array('send_buffer', [self.send_capacity, self.elem_capacity], Types.Real, arr_sync=False) self.send_map = sim.add_array('send_map', [self.send_capacity], Types.Int32, arr_sync=False) self.exchg_flag = sim.add_array('exchg_flag', [sim.particle_capacity], Types.Int32, arr_sync=False) self.exchg_copy_to = sim.add_array('exchg_copy_to', [self.send_capacity], Types.Int32, arr_sync=False) self.send_mult = sim.add_array('send_mult', [self.send_capacity, sim.ndims()], Types.Int32) - self.nrecv = sim.add_array('nrecv', [self.neigh_capacity], Types.Int32) - self.recv_offsets = sim.add_array('recv_offsets', [self.neigh_capacity], Types.Int32) + self.nrecv = sim.add_array('nrecv', [dom_part.nranks_capacity], Types.Int32) + self.recv_offsets = sim.add_array('recv_offsets', [dom_part.nranks_capacity], Types.Int32) self.recv_buffer = sim.add_array('recv_buffer', [self.recv_capacity, self.elem_capacity], Types.Real, arr_sync=False) self.recv_map = sim.add_array('recv_map', [self.recv_capacity], Types.Int32) self.recv_mult = sim.add_array('recv_mult', [self.recv_capacity, sim.ndims()], Types.Int32) - self.nsend_contact = sim.add_array('nsend_contact', [self.neigh_capacity], Types.Int32) - self.nrecv_contact = sim.add_array('nrecv_contact', [self.neigh_capacity], Types.Int32) - self.contact_soffsets = sim.add_array('contact_soffsets', [self.neigh_capacity], Types.Int32) - self.contact_roffsets = sim.add_array('contact_roffsets', [self.neigh_capacity], Types.Int32) + self.nsend_contact = sim.add_array('nsend_contact', [dom_part.nranks_capacity], Types.Int32) + self.nrecv_contact = sim.add_array('nrecv_contact', [dom_part.nranks_capacity], Types.Int32) + self.contact_soffsets = sim.add_array('contact_soffsets', [dom_part.nranks_capacity], Types.Int32) + self.contact_roffsets = sim.add_array('contact_roffsets', [dom_part.nranks_capacity], Types.Int32) @pairs_inline def synchronize(self): diff --git a/src/pairs/sim/domain_partitioning.py b/src/pairs/sim/domain_partitioning.py index aae6dfc060d45fb2bac129ca169fe9296fd64cd5..1c88c4c4f750457d79ea5df7ee0fccdfe8317ab5 100644 --- a/src/pairs/sim/domain_partitioning.py +++ b/src/pairs/sim/domain_partitioning.py @@ -12,10 +12,12 @@ from pairs.sim.lowerable import Lowerable class DimensionRanges: def __init__(self, sim): - self.sim = sim - self.neighbor_ranks = sim.add_static_array('neighbor_ranks', [sim.ndims() * 2], Types.Int32) - self.pbc = sim.add_static_array('pbc', [sim.ndims() * 2], Types.Int32) - self.subdom = sim.add_static_array('subdom', [sim.ndims() * 2], Types.Real) + self.sim = sim + self.nranks = 6 + self.nranks_capacity = self.nranks + self.neighbor_ranks = sim.add_static_array('neighbor_ranks', [sim.ndims() * 2], Types.Int32) + self.pbc = sim.add_static_array('pbc', [sim.ndims() * 2], Types.Int32) + self.subdom = sim.add_static_array('subdom', [sim.ndims() * 2], Types.Real) def min(self, dim): return self.subdom[dim * 2 + 0]