diff --git a/runtime/domain/regular_6d_stencil.cpp b/runtime/domain/regular_6d_stencil.cpp index 4e7631f071d65a8197f98d54789eee3f54bca4f4..f8f2472877b8383fbfe986104ac2df479f1c412e 100644 --- a/runtime/domain/regular_6d_stencil.cpp +++ b/runtime/domain/regular_6d_stencil.cpp @@ -118,7 +118,6 @@ void Regular6DStencil::communicateData( int dim, int elem_size, const real_t *send_buf, const int *send_offsets, const int *nsend, real_t *recv_buf, const int *recv_offsets, const int *nrecv) { - //std::cout << "communicateData" << std::endl; const real_t *send_prev = &send_buf[send_offsets[dim * 2 + 0] * elem_size]; const real_t *send_next = &send_buf[send_offsets[dim * 2 + 1] * elem_size]; @@ -126,10 +125,6 @@ void Regular6DStencil::communicateData( real_t *recv_next = &recv_buf[recv_offsets[dim * 2 + 1] * elem_size]; if(prev[dim] != rank) { - //std::cout << rank << ": send " << nsend[dim * 2 + 0] << " elems to " << prev[dim] << ", recv " << nrecv[dim * 2 + 0] << " elems from " << next[dim] << std::endl; - //MPI_Send(send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, MPI_COMM_WORLD); - //MPI_Recv(recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, next[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Sendrecv( send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, next[dim], 0, @@ -142,10 +137,6 @@ void Regular6DStencil::communicateData( } if(next[dim] != rank) { - //std::cout << rank << ": send " << nsend[dim * 2 + 1] << " elems to " << next[dim] << ", recv " << nrecv[dim * 2 + 1] << " elems from " << prev[dim] << std::endl; - //MPI_Send(send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, MPI_COMM_WORLD); - //MPI_Recv(recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, prev[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Sendrecv( send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, prev[dim], 0, @@ -158,4 +149,41 @@ void Regular6DStencil::communicateData( } } +void Regular6DStencil::communicateAllData( + int ndims, int elem_size, + const real_t *send_buf, const int *send_offsets, const int *nsend, + real_t *recv_buf, const int *recv_offsets, const int *nrecv) { + + for(int d = 0; d < ndims; d++) { + const real_t *send_prev = &send_buf[send_offsets[d * 2 + 0] * elem_size]; + const real_t *send_next = &send_buf[send_offsets[d * 2 + 1] * elem_size]; + real_t *recv_prev = &recv_buf[recv_offsets[d * 2 + 0] * elem_size]; + real_t *recv_next = &recv_buf[recv_offsets[d * 2 + 1] * elem_size]; + + if(prev[d] != rank) { + MPI_Sendrecv( + send_prev, nsend[d * 2 + 0] * elem_size, MPI_DOUBLE, prev[d], 0, + recv_prev, nrecv[d * 2 + 0] * elem_size, MPI_DOUBLE, next[d], 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + } else { + for(int i = 0; i < nsend[d * 2 + 0] * elem_size; i++) { + recv_prev[i] = send_prev[i]; + } + } + + if(next[d] != rank) { + MPI_Sendrecv( + send_next, nsend[d * 2 + 1] * elem_size, MPI_DOUBLE, next[d], 0, + recv_next, nrecv[d * 2 + 1] * elem_size, MPI_DOUBLE, prev[d], 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + } else { + for(int i = 0; i < nsend[d * 2 + 1] * elem_size; i++) { + recv_next[i] = send_next[i]; + } + } + } +} + } diff --git a/runtime/domain/regular_6d_stencil.hpp b/runtime/domain/regular_6d_stencil.hpp index 1c12ff45de9a2a1a69b1067cdc42d4c2e995c338..8a43b2d4954bc1fae4eff570a2972a8484dcf461 100644 --- a/runtime/domain/regular_6d_stencil.hpp +++ b/runtime/domain/regular_6d_stencil.hpp @@ -54,6 +54,11 @@ public: int dim, int elem_size, const real_t *send_buf, const int *send_offsets, const int *nsend, real_t *recv_buf, const int *recv_offsets, const int *nrecv); + + void communicateAllData( + int ndims, int elem_size, + const real_t *send_buf, const int *send_offsets, const int *nsend, + real_t *recv_buf, const int *recv_offsets, const int *nrecv); }; } diff --git a/runtime/pairs.cpp b/runtime/pairs.cpp index 9ec1f23a2bfc64eb2665d99a7625bcd731d9618a..93c714c3fc99a619fc28dca42213fda086c3e43d 100644 --- a/runtime/pairs.cpp +++ b/runtime/pairs.cpp @@ -305,7 +305,6 @@ void PairsSimulation::communicateSizes(int dim, const int *send_sizes, int *recv array_flags->setHostFlag(nrecv_id); array_flags->clearDeviceFlag(nrecv_id); this->getDomainPartitioner()->communicateSizes(dim, send_sizes, recv_sizes); - //PAIRS_DEBUG("send_sizes=[%d, %d], recv_sizes=[%d, %d]\n", send_sizes[dim * 2 + 0], send_sizes[dim * 2 + 1], recv_sizes[dim * 2 + 0], recv_sizes[dim * 2 + 1]); } void PairsSimulation::communicateData( @@ -342,59 +341,42 @@ void PairsSimulation::communicateData( dim, elem_size, send_buf, send_offsets, nsend, recv_buf, recv_offsets, nrecv); copyArrayToDevice(recv_buf_id, Ignore, nrecv_all * elem_size * sizeof(real_t)); +} - /* - // Debug messages - const int elems_to_print = 5; - - // Send buffer debug - for(int i = 0; i < 2; i++) { - int nsnd = nsend[dim * 2 + i]; - - PAIRS_DEBUG("send_buf=["); - for(int j = 0; j < MIN(elems_to_print, nsnd); j++) { - for(int k = 0; k < elem_size; k++) { - PAIRS_DEBUG("%f,", send_buf[(send_offsets[dim * 2 + i] + j) * elem_size + k]); - } - } +void PairsSimulation::communicateAllData( + int ndims, int elem_size, + const real_t *send_buf, const int *send_offsets, const int *nsend, + real_t *recv_buf, const int *recv_offsets, const int *nrecv) { - if(elems_to_print * 2 < nsnd) { - PAIRS_DEBUG("\b ... "); - } + auto send_buf_id = getArrayByHostPointer(send_buf).getId(); + auto recv_buf_id = getArrayByHostPointer(recv_buf).getId(); + auto send_offsets_id = getArrayByHostPointer(send_offsets).getId(); + auto recv_offsets_id = getArrayByHostPointer(recv_offsets).getId(); + auto nsend_id = getArrayByHostPointer(nsend).getId(); + auto nrecv_id = getArrayByHostPointer(nrecv).getId(); - for(int j = MAX(elems_to_print, nsnd - elems_to_print); j < nsnd; j++) { - for(int k = 0; k < elem_size; k++) { - PAIRS_DEBUG("%f,", send_buf[(send_offsets[dim * 2 + i] + j) * elem_size + k]); - } - } + copyArrayToHost(send_offsets_id, ReadOnly); + copyArrayToHost(recv_offsets_id, ReadOnly); + copyArrayToHost(nsend_id, ReadOnly); + copyArrayToHost(nrecv_id, ReadOnly); - PAIRS_DEBUG("\b]\n"); + int nsend_all = 0; + int nrecv_all = 0; + for(int d = 0; d <= ndims; d++) { + nsend_all += nsend[d * 2 + 0]; + nsend_all += nsend[d * 2 + 1]; + nrecv_all += nrecv[d * 2 + 0]; + nrecv_all += nrecv[d * 2 + 1]; } - // Receive buffer debug - for(int i = 0; i < 2; i++) { - int nrec = nrecv[dim * 2 + i]; - - PAIRS_DEBUG("recv_buf=["); - for(int j = 0; j < MIN(elems_to_print, nrec); j++) { - for(int k = 0; k < elem_size; k++) { - PAIRS_DEBUG("%f,", recv_buf[(recv_offsets[dim * 2 + i] + j) * elem_size + k]); - } - } - - if(elems_to_print * 2 < nrec) { - PAIRS_DEBUG("\b ... "); - } + copyArrayToHost(send_buf_id, Ignore, nsend_all * elem_size * sizeof(real_t)); - for(int j = MAX(elems_to_print, nrec - elems_to_print); j < nrec; j++) { - for(int k = 0; k < elem_size; k++) { - PAIRS_DEBUG("%f,", recv_buf[(recv_offsets[dim * 2 + i] + j) * elem_size + k]); - } - } + array_flags->setHostFlag(recv_buf_id); + array_flags->clearDeviceFlag(recv_buf_id); + this->getDomainPartitioner()->communicateAllData( + ndims, elem_size, send_buf, send_offsets, nsend, recv_buf, recv_offsets, nrecv); - PAIRS_DEBUG("\b]\n"); - } - */ + copyArrayToDevice(recv_buf_id, Ignore, nrecv_all * elem_size * sizeof(real_t)); } void PairsSimulation::fillCommunicationArrays(int *neighbor_ranks, int *pbc, real_t *subdom) { diff --git a/runtime/pairs.hpp b/runtime/pairs.hpp index ba987e2ad510c6c67e164e7bae4349dc8120cdba..0cfbefb78e2d3d5fc73513c18fd889a80c36a0bf 100644 --- a/runtime/pairs.hpp +++ b/runtime/pairs.hpp @@ -229,11 +229,17 @@ public: Regular6DStencil *getDomainPartitioner() { return dom_part; } void communicateSizes(int dim, const int *send_sizes, int *recv_sizes); + void communicateData( int dim, int elem_size, const real_t *send_buf, const int *send_offsets, const int *nsend, real_t *recv_buf, const int *recv_offsets, const int *nrecv); + void communicateAllData( + int ndims, int elem_size, + const real_t *send_buf, const int *send_offsets, const int *nsend, + real_t *recv_buf, const int *recv_offsets, const int *nrecv); + void fillCommunicationArrays(int neighbor_ranks[], int pbc[], real_t subdom[]); // Device functions diff --git a/src/pairs/sim/comm.py b/src/pairs/sim/comm.py index 242c887531792b81385762f94a0421563aaa7ce1..3f552ed8735e36922a8be2cb701642d9187e3eb7 100644 --- a/src/pairs/sim/comm.py +++ b/src/pairs/sim/comm.py @@ -41,18 +41,31 @@ class Comm: def synchronize(self): # Every property that is not constant across timesteps and have neighbor accesses during any # interaction kernel (i.e. property[j] in force calculation kernel) - prop_list = [self.sim.property(p) for p in ['position', 'linear_velocity', 'angular_velocity'] if self.sim.property(p) is not None] - for step in range(self.dom_part.number_of_steps()): - PackGhostParticles(self, step, prop_list) - CommunicateData(self, step, prop_list) - UnpackGhostParticles(self, step, prop_list) + prop_names = ['position', 'linear_velocity', 'angular_velocity'] + prop_list = [ self.sim.property(p) for p in prop_names if self.sim.property(p) is not None] + + PackAllGhostParticles(self, prop_list) + CommunicateAllData(self, prop_list) + UnpackAllGhostParticles(self, prop_list) @pairs_inline def borders(self): # Every property that has neighbor accesses during any interaction kernel (i.e. property[j] # exists in any force calculation kernel) - # We ignore normal because there should be no halfspace ghosts - prop_list = [self.sim.property(p) for p in ['uid', 'type', 'mass', 'radius', 'position', 'linear_velocity', 'angular_velocity', 'shape'] if self.sim.property(p) is not None] + # We ignore normal because there should be no ghost half-spaces + prop_names = [ + 'uid', + 'type', + 'mass', + 'radius', + 'position', + 'linear_velocity', + 'angular_velocity', + 'shape' + ] + + prop_list = [self.sim.property(p) for p in prop_names if self.sim.property(p) is not None] + Assign(self.sim, self.nsend_all, 0) Assign(self.sim, self.sim.nghost, 0) @@ -75,12 +88,15 @@ class Comm: PackGhostParticles(self, step, prop_list) CommunicateData(self, step, prop_list) UnpackGhostParticles(self, step, prop_list) - Assign(self.sim, self.sim.nghost, self.sim.nghost + sum([self.nrecv[j] for j in self.dom_part.step_indexes(step)])) + + step_nrecv = sum([self.nrecv[j] for j in self.dom_part.step_indexes(step)]) + Assign(self.sim, self.sim.nghost, self.sim.nghost + step_nrecv) @pairs_inline def exchange(self): # Every property except volatiles prop_list = self.sim.properties.non_volatiles() + for step in range(self.dom_part.number_of_steps()): Assign(self.sim, self.nsend_all, 0) Assign(self.sim, self.sim.nghost, 0) @@ -130,9 +146,31 @@ class CommunicateData(Lowerable): @pairs_inline def lower(self): elem_size = sum([Types.number_of_elements(self.sim, p.type()) for p in self.prop_list]) - Call_Void(self.sim, "pairs->communicateData", [self.step, elem_size, - self.comm.send_buffer, self.comm.send_offsets, self.comm.nsend, - self.comm.recv_buffer, self.comm.recv_offsets, self.comm.nrecv]) + + Call_Void(self.sim, + "pairs->communicateData", + [self.step, elem_size, + self.comm.send_buffer, self.comm.send_offsets, self.comm.nsend, + self.comm.recv_buffer, self.comm.recv_offsets, self.comm.nrecv]) + + +class CommunicateAllData(Lowerable): + def __init__(self, comm, prop_list): + super().__init__(comm.sim) + self.comm = comm + self.prop_list = prop_list + self.sim.add_statement(self) + + @pairs_inline + def lower(self): + elem_size = sum([Types.number_of_elements(self.sim, p.type()) for p in self.prop_list]) + + Call_Void( + self.sim, + "pairs->communicateAllData", + [self.comm.dom_part.number_of_steps(), elem_size, + self.comm.send_buffer, self.comm.send_offsets, self.comm.nsend, + self.comm.recv_buffer, self.comm.recv_offsets, self.comm.nrecv]) class DetermineGhostParticles(Lowerable): @@ -281,6 +319,82 @@ class UnpackGhostParticles(Lowerable): p_offset += 1 +class PackAllGhostParticles(Lowerable): + def __init__(self, comm, prop_list): + super().__init__(comm.sim) + self.comm = comm + self.prop_list = prop_list + self.sim.add_statement(self) + + def get_elems_per_particle(self): + return sum([Types.number_of_elements(self.sim, p.type()) for p in self.prop_list]) + + @pairs_device_block + def lower(self): + send_buffer = self.comm.send_buffer + send_buffer.set_stride(1, self.get_elems_per_particle()) + send_map = self.comm.send_map + send_mult = self.comm.send_mult + self.sim.module_name(f"pack_all_ghost_particles" + "_".join([str(p.id()) for p in self.prop_list])) + + for i in For(self.sim, 0, self.comm.nsend_all): + p_offset = 0 + m = send_map[i] + for p in self.prop_list: + if not Types.is_scalar(p.type()): + nelems = Types.number_of_elements(self.sim, p.type()) + for e in range(nelems): + src = p[m][e] + if p == self.sim.position(): + src += send_mult[i][e] * self.sim.grid.length(e) + + Assign(self.sim, send_buffer[i][p_offset + e], src) + + p_offset += nelems + + else: + cast_fn = lambda x: Cast(self.sim, x, Types.Real) if p.type() != Types.Real else x + Assign(self.sim, send_buffer[i][p_offset], cast_fn(p[m])) + p_offset += 1 + + +class UnpackAllGhostParticles(Lowerable): + def __init__(self, comm, prop_list): + super().__init__(comm.sim) + self.comm = comm + self.prop_list = prop_list + self.sim.add_statement(self) + + def get_elems_per_particle(self): + return sum([Types.number_of_elements(self.sim, p.type()) for p in self.prop_list]) + + @pairs_device_block + def lower(self): + nlocal = self.sim.nlocal + dom_part = self.comm.dom_part + recv_buffer = self.comm.recv_buffer + recv_buffer.set_stride(1, self.get_elems_per_particle()) + self.sim.module_name(f"unpack_all_ghost_particles" + "_".join([str(p.id()) for p in self.prop_list])) + + nrecv_size = sum([len(dom_part.step_indexes(s)) for s in range(dom_part.number_of_steps())]) + nrecv_all = sum([self.comm.nrecv[j] for j in range(nrecv_size)]) + + for i in For(self.sim, 0, nrecv_all): + p_offset = 0 + for p in self.prop_list: + if not Types.is_scalar(p.type()): + nelems = Types.number_of_elements(self.sim, p.type()) + for e in range(nelems): + Assign(self.sim, p[nlocal + i][e], recv_buffer[i][p_offset + e]) + + p_offset += nelems + + else: + cast_fn = lambda x: Cast(self.sim, x, p.type()) if p.type() != Types.Real else x + Assign(self.sim, p[nlocal + i], cast_fn(recv_buffer[i][p_offset])) + p_offset += 1 + + class RemoveExchangedParticles_part1(Lowerable): def __init__(self, comm): super().__init__(comm.sim)