diff --git a/runtime/pairs.cpp b/runtime/pairs.cpp index 643dfa4c09bee587feeb8385084f44198ffec76f..478bee8bee9ba81efb9293262bd9e785dd550396 100644 --- a/runtime/pairs.cpp +++ b/runtime/pairs.cpp @@ -377,6 +377,58 @@ void PairsSimulation::communicateAllData( this->getTimers()->stop(DeviceTransfers); } +void PairsSimulation::communicateContactHistoryData( + int dim, int nelems_per_contact, + const real_t *send_buf, const int *contact_soffsets, const int *nsend_contact, + real_t *recv_buf, int *contact_roffsets, int *nrecv_contact) { + + auto send_buf_id = getArrayByHostPointer(send_buf).getId(); + auto recv_buf_id = getArrayByHostPointer(recv_buf).getId(); + auto contact_soffsets_id = getArrayByHostPointer(contact_soffsets).getId(); + auto contact_roffsets_id = getArrayByHostPointer(contact_roffsets).getId(); + auto nsend_contact_id = getArrayByHostPointer(nsend_contact).getId(); + auto nrecv_contact_id = getArrayByHostPointer(nrecv_contact).getId(); + + this->getTimers()->start(DeviceTransfers); + copyArrayToHost(contact_soffsets_id, ReadOnly); + copyArrayToHost(nsend_contact_id, ReadOnly); + + int nsend_all = 0; + for(int d = 0; d <= dim; d++) { + contact_roffsets[d * 2 + 0] = 0; + contact_roffsets[d * 2 + 1] = 0; + nsend_all += nsend_contact[d * 2 + 0]; + nsend_all += nsend_contact[d * 2 + 1]; + } + + copyArrayToHost(send_buf_id, Ignore, nsend_all * sizeof(real_t)); + array_flags->setHostFlag(recv_buf_id); + array_flags->clearDeviceFlag(recv_buf_id); + this->getTimers()->stop(DeviceTransfers); + + this->getTimers()->start(Communication); + this->getDomainPartitioner()->communicateSizes(dim, nsend_contact, nrecv_contact); + + contact_roffsets[dim * 2 + 0] = 0; + contact_roffsets[dim * 2 + 1] = nrecv_contact[dim * 2 + 0]; + + int nrecv_all = 0; + for(int d = 0; d <= dim; d++) { + nrecv_all += nrecv_contact[d * 2 + 0]; + nrecv_all += nrecv_contact[d * 2 + 1]; + } + + this->getDomainPartitioner()->communicateData( + dim, 1, send_buf, contact_soffsets, nsend_contact, recv_buf, contact_roffsets, nrecv_contact); + this->getTimers()->stop(Communication); + + this->getTimers()->start(DeviceTransfers); + copyArrayToDevice(recv_buf_id, Ignore, nrecv_all * sizeof(real_t)); + copyArrayToDevice(contact_roffsets_id, Ignore); + copyArrayToDevice(nrecv_contact_id, Ignore); + this->getTimers()->stop(DeviceTransfers); +} + void PairsSimulation::fillCommunicationArrays(int *neighbor_ranks, int *pbc, real_t *subdom) { this->getDomainPartitioner()->fillArrays(neighbor_ranks, pbc, subdom); } diff --git a/runtime/pairs.hpp b/runtime/pairs.hpp index c99a11a4bb84bda4d9ec7ee889a909b95c90612c..415d63467100f3846dce2dee0fced17b014929ee 100644 --- a/runtime/pairs.hpp +++ b/runtime/pairs.hpp @@ -275,6 +275,11 @@ public: const real_t *send_buf, const int *send_offsets, const int *nsend, real_t *recv_buf, const int *recv_offsets, const int *nrecv); + void communicateContactHistoryData( + int dim, int nelems_per_contact, + const real_t *send_buf, const int *contact_soffsets, const int *nsend_contact, + real_t *recv_buf, int *contact_roffsets, int *nrecv_contact); + void fillCommunicationArrays(int neighbor_ranks[], int pbc[], real_t subdom[]); // Device functions diff --git a/src/pairs/sim/comm.py b/src/pairs/sim/comm.py index c6429e8d3a1b1b4572563a4e301a9226b993a9cd..5bad24e1a65ace67146ca1957bd28719bbd0e851 100644 --- a/src/pairs/sim/comm.py +++ b/src/pairs/sim/comm.py @@ -18,25 +18,29 @@ from pairs.sim.lowerable import Lowerable class Comm: def __init__(self, sim, dom_part): - self.sim = sim - self.dom_part = dom_part - self.nsend_all = sim.add_var('nsend_all', Types.Int32) - self.send_capacity = sim.add_var('send_capacity', Types.Int32, 80000) - self.recv_capacity = sim.add_var('recv_capacity', Types.Int32, 80000) - self.elem_capacity = sim.add_var('elem_capacity', Types.Int32, 40) - self.neigh_capacity = sim.add_var('neigh_capacity', Types.Int32, 10) - self.nsend = sim.add_array('nsend', [self.neigh_capacity], Types.Int32) - self.send_offsets = sim.add_array('send_offsets', [self.neigh_capacity], Types.Int32) - self.send_buffer = sim.add_array('send_buffer', [self.send_capacity, self.elem_capacity], Types.Real, arr_sync=False) - self.send_map = sim.add_array('send_map', [self.send_capacity], Types.Int32, arr_sync=False) - self.exchg_flag = sim.add_array('exchg_flag', [sim.particle_capacity], Types.Int32, arr_sync=False) - self.exchg_copy_to = sim.add_array('exchg_copy_to', [self.send_capacity], Types.Int32, arr_sync=False) - self.send_mult = sim.add_array('send_mult', [self.send_capacity, sim.ndims()], Types.Int32) - self.nrecv = sim.add_array('nrecv', [self.neigh_capacity], Types.Int32) - self.recv_offsets = sim.add_array('recv_offsets', [self.neigh_capacity], Types.Int32) - self.recv_buffer = sim.add_array('recv_buffer', [self.recv_capacity, self.elem_capacity], Types.Real, arr_sync=False) - self.recv_map = sim.add_array('recv_map', [self.recv_capacity], Types.Int32) - self.recv_mult = sim.add_array('recv_mult', [self.recv_capacity, sim.ndims()], Types.Int32) + self.sim = sim + self.dom_part = dom_part + self.nsend_all = sim.add_var('nsend_all', Types.Int32) + self.send_capacity = sim.add_var('send_capacity', Types.Int32, 200000) + self.recv_capacity = sim.add_var('recv_capacity', Types.Int32, 200000) + self.elem_capacity = sim.add_var('elem_capacity', Types.Int32, 40) + self.neigh_capacity = sim.add_var('neigh_capacity', Types.Int32, 10) + self.nsend = sim.add_array('nsend', [self.neigh_capacity], Types.Int32) + self.send_offsets = sim.add_array('send_offsets', [self.neigh_capacity], Types.Int32) + self.send_buffer = sim.add_array('send_buffer', [self.send_capacity, self.elem_capacity], Types.Real, arr_sync=False) + self.send_map = sim.add_array('send_map', [self.send_capacity], Types.Int32, arr_sync=False) + self.exchg_flag = sim.add_array('exchg_flag', [sim.particle_capacity], Types.Int32, arr_sync=False) + self.exchg_copy_to = sim.add_array('exchg_copy_to', [self.send_capacity], Types.Int32, arr_sync=False) + self.send_mult = sim.add_array('send_mult', [self.send_capacity, sim.ndims()], Types.Int32) + self.nrecv = sim.add_array('nrecv', [self.neigh_capacity], Types.Int32) + self.recv_offsets = sim.add_array('recv_offsets', [self.neigh_capacity], Types.Int32) + self.recv_buffer = sim.add_array('recv_buffer', [self.recv_capacity, self.elem_capacity], Types.Real, arr_sync=False) + self.recv_map = sim.add_array('recv_map', [self.recv_capacity], Types.Int32) + self.recv_mult = sim.add_array('recv_mult', [self.recv_capacity, sim.ndims()], Types.Int32) + self.nsend_contact = sim.add_array('nsend_contact', [self.neigh_capacity], Types.Int32) + self.nrecv_contact = sim.add_array('nrecv_contact', [self.neigh_capacity], Types.Int32) + self.contact_soffsets = sim.add_array('contact_soffsets', [self.neigh_capacity], Types.Int32) + self.contact_roffsets = sim.add_array('contact_roffsets', [self.neigh_capacity], Types.Int32) @pairs_inline def synchronize(self): @@ -108,6 +112,10 @@ class Comm: Assign(self.sim, self.nrecv[j], 0) Assign(self.sim, self.send_offsets[j], 0) Assign(self.sim, self.recv_offsets[j], 0) + Assign(self.sim, self.nsend_contact[j], 0) + Assign(self.sim, self.nrecv_contact[j], 0) + Assign(self.sim, self.contact_soffsets[j], 0) + Assign(self.sim, self.contact_soffsets[j], 0) if self.sim._target.is_gpu(): CopyArray(self.sim, self.nsend, Contexts.Device, Actions.Ignore) @@ -134,6 +142,12 @@ class Comm: RemoveExchangedParticles_part2(self, prop_list) CommunicateData(self, step, prop_list) UnpackGhostParticles(self, step, prop_list) + + if self.sim._use_contact_history: + PackContactHistoryData(self, step) + CommunicateContactHistoryData(self, step) + UnpackContactHistoryData(self, step) + ChangeSizeAfterExchange(self, step) @@ -168,6 +182,25 @@ class CommunicateData(Lowerable): self.comm.recv_buffer, self.comm.recv_offsets, self.comm.nrecv]) +class CommunicateContactHistoryData(Lowerable): + def __init__(self, comm, step): + super().__init__(comm.sim) + self.comm = comm + self.step = step + self.sim.add_statement(self) + + @pairs_inline + def lower(self): + nelems_per_contact = sum([Types.number_of_elements(self.sim, cp.type()) \ + for cp in self.sim.contact_properties]) + 1 + + Call_Void(self.sim, + "pairs->communicateContactHistoryData", + [self.step, nelems_per_contact, + self.comm.send_buffer, self.comm.contact_soffsets, self.comm.nsend_contact, + self.comm.recv_buffer, self.comm.contact_roffsets, self.comm.nrecv_contact]) + + class CommunicateAllData(Lowerable): def __init__(self, comm, prop_list): super().__init__(comm.sim) @@ -456,6 +489,20 @@ class RemoveExchangedParticles_part2(Lowerable): else: Assign(self.sim, p[dst], p[src]) + if self.sim._use_contact_history: + contact_lists = self.sim._contact_history.contact_lists + num_contacts = self.sim._contact_history.num_contacts + contact_used = self.sim._contact_history.contact_used + + for k in For(self.sim, 0, num_contacts[src]): + Assign(self.sim, contact_lists[dst][k], contact_lists[src][k]) + Assign(self.sim, contact_used[dst][k], contact_used[src][k]) + + for contact_prop in self.sim.contact_properties: + Assign(self.sim, contact_prop[dst, k], contact_prop[src, k]) + + Assign(self.sim, num_contacts[dst], num_contacts[src]) + Assign(self.sim, self.sim.nlocal, self.sim.nlocal - self.comm.nsend_all) @@ -471,3 +518,133 @@ class ChangeSizeAfterExchange(Lowerable): self.sim.module_name(f"change_size_after_exchange{self.step}") self.sim.check_resize(self.sim.particle_capacity, self.sim.nlocal) Assign(self.sim, self.sim.nlocal, self.sim.nlocal + sum([self.comm.nrecv[j] for j in self.comm.dom_part.step_indexes(self.step)])) + + +class PackContactHistoryData(Lowerable): + def __init__(self, comm, step): + super().__init__(comm.sim) + self.comm = comm + self.step = step + self.sim.add_statement(self) + + @pairs_device_block + def lower(self): + send_buffer = self.comm.send_buffer + send_buffer.set_stride(1, 1) + contact_soffsets = self.comm.contact_soffsets + nsend_contact = self.comm.nsend_contact + contact_lists = self.sim._contact_history.contact_lists + num_contacts = self.sim._contact_history.num_contacts + self.sim.module_name(f"pack_contact_history{self.step}") + nelems_per_contact = sum([Types.number_of_elements(self.sim, cp.type()) \ + for cp in self.sim.contact_properties]) + 1 + + previous_step = None + for step_index in self.comm.dom_part.step_indexes(self.step): + offset = 0 if previous_step is None else \ + contact_soffsets[previous_step] + nsend_contact[previous_step] + + Assign(self.sim, contact_soffsets[step_index], offset) + + start = self.comm.send_offsets[step_index] + nparticles = self.comm.nsend[step_index] + + Assign(self.sim, nsend_contact[step_index], nparticles * 2) + + for i in For(self.sim, start, ScalarOp.inline(start + nparticles)): + buf_i = i - start + m = self.comm.send_map[i] + ncontacts = num_contacts[m] + contact_total_elems = ncontacts * nelems_per_contact + soff = contact_soffsets[step_index] + offset = AtomicAdd(self.sim, self.comm.nsend_contact[step_index], contact_total_elems) + + Assign(self.sim, send_buffer[soff][buf_i], Cast(self.sim, ncontacts, Types.Real)) + Assign(self.sim, send_buffer[soff][buf_i + nparticles], Cast(self.sim, offset, Types.Real)) + + for k in For(self.sim, 0, ncontacts): + disp = k * nelems_per_contact + cp_offset = 1 + + Assign(self.sim, send_buffer[soff][offset + 0], + Cast(self.sim, contact_lists[m][k], Types.Real)) + + for contact_prop in self.sim.contact_properties: + if not Types.is_scalar(contact_prop.type()): + nelems = Types.number_of_elements(self.sim, contact_prop.type()) + for e in range(nelems): + Assign(self.sim, send_buffer[soff][offset + disp + cp_offset + e], + contact_prop[m, k][e]) + + cp_offset += nelems + + else: + cast_fn = lambda x: x if contact_prop.type() == Types.Real else \ + Cast(self.sim, x, Types.Real) + + Assign(self.sim, send_buffer[soff][offset + disp + cp_offset], + cast_fn(contact_prop[m, k])) + cp_offset += 1 + + previous_step = step_index + + +class UnpackContactHistoryData(Lowerable): + def __init__(self, comm, step): + super().__init__(comm.sim) + self.comm = comm + self.step = step + self.sim.add_statement(self) + + @pairs_device_block + def lower(self): + nlocal = self.sim.nlocal + recv_buffer = self.comm.recv_buffer + recv_buffer.set_stride(1, 1) + contact_roffsets = self.comm.contact_roffsets + contact_lists = self.sim._contact_history.contact_lists + num_contacts = self.sim._contact_history.num_contacts + contact_used = self.sim._contact_history.contact_used + self.sim.module_name(f"unpack_contact_history{self.step}") + + step_indexes = self.comm.dom_part.step_indexes(self.step) + start = self.comm.recv_offsets[step_indexes[0]] + nparticles = sum([self.comm.nrecv[j] for j in step_indexes]) + nelems_per_contact = sum([Types.number_of_elements(self.sim, cp.type()) \ + for cp in self.sim.contact_properties]) + 1 + + for step_index in self.comm.dom_part.step_indexes(self.step): + start = self.comm.recv_offsets[step_index] + nparticles = self.comm.nrecv[step_index] + + for i in For(self.sim, start, ScalarOp.inline(start + nparticles)): + buf_i = i - start + roff = contact_roffsets[step_index] + ncontacts = Cast(self.sim, recv_buffer[roff][buf_i], Types.Int32) + offset = Cast(self.sim, recv_buffer[roff][buf_i + nparticles], Types.Int32) + + Assign(self.sim, num_contacts[nlocal + i], ncontacts) + + for k in For(self.sim, 0, ncontacts): + disp = k * nelems_per_contact + cp_offset = 0 + + Assign(self.sim, contact_lists[nlocal + i][k], recv_buffer[roff][offset + 0]) + Assign(self.sim, contact_used[nlocal + i][k], 1) + + for contact_prop in self.sim.contact_properties: + if not Types.is_scalar(contact_prop.type()): + nelems = Types.number_of_elements(self.sim, contact_prop.type()) + for e in range(nelems): + Assign(self.sim, contact_prop[nlocal + i, k][e], + recv_buffer[roff][offset + disp + cp_offset + e]) + + cp_offset += nelems + + else: + cast_fn = lambda x: x if contact_prop.type() == Types.Real else \ + Cast(self.sim, x, contact_prop.type()) + + Assign(self.sim, contact_prop[nlocal + i, k], + cast_fn(recv_buffer[roff][offset + disp + cp_offset])) + cp_offset += 1