diff --git a/examples/dem.py b/examples/dem.py index 965b89d2df475f67bfeafe0dc2ecd33f718cbfb9..60e889b40e2bda6763f9818a938e59ec25c46355 100644 --- a/examples/dem.py +++ b/examples/dem.py @@ -122,9 +122,7 @@ minDiameter_SI = diameter_SI * 0.9 maxDiameter_SI = diameter_SI * 1.1 linkedCellWidth = 1.01 * maxDiameter_SI -skin = 0.0 ntypes = 1 - lnDryResCoeff = math.log(restitutionCoefficient); frictionStatic = 0.0 frictionDynamic = frictionCoefficient @@ -182,8 +180,7 @@ psim.setup(update_mass_and_inertia, {'densityParticle_SI': densityParticle_SI, 'infinity': math.inf }) #psim.compute_half() -#psim.build_cell_lists(linkedCellWidth) -psim.build_neighbor_lists(linkedCellWidth + skin) +psim.build_cell_lists(linkedCellWidth) psim.vtk_output(f"output/dem_{target}", frequency=visSpacing) psim.compute(gravity, @@ -193,7 +190,7 @@ psim.compute(gravity, 'pi': math.pi }) psim.compute(linear_spring_dashpot, - linkedCellWidth + skin, + linkedCellWidth, symbols={'dt': dt_SI, 'pi': math.pi, 'kappa': kappa, diff --git a/runtime/devices/cuda.cu b/runtime/devices/cuda.cu index 687bb76600376008c95e6eda496c763c29b9938d..3e4f0672bf04fc924c8ce1683d01e999e5426dd0 100644 --- a/runtime/devices/cuda.cu +++ b/runtime/devices/cuda.cu @@ -42,6 +42,18 @@ __host__ void copy_to_host(const void *d_ptr, void *h_ptr, size_t count) { CUDA_ASSERT(cudaMemcpy(h_ptr, d_ptr, count, cudaMemcpyDeviceToHost)); } +__host__ void copy_slice_to_device(const void *h_ptr, void *d_ptr, size_t offset, size_t count) { + void *d_ptr_start = ((char *) d_ptr) + offset; + void *h_ptr_start = ((char *) h_ptr) + offset; + CUDA_ASSERT(cudaMemcpy(d_ptr_start, h_ptr_start, count, cudaMemcpyHostToDevice)); +} + +__host__ void copy_slice_to_host(const void *d_ptr, void *h_ptr, size_t offset, size_t count) { + void *d_ptr_start = ((char *) d_ptr) + offset; + void *h_ptr_start = ((char *) h_ptr) + offset; + CUDA_ASSERT(cudaMemcpy(h_ptr_start, d_ptr_start, count, cudaMemcpyDeviceToHost)); +} + __host__ void copy_static_symbol_to_device(void *h_ptr, const void *d_ptr, size_t count) { CUDA_ASSERT(cudaMemcpyToSymbol(d_ptr, h_ptr, count)); } diff --git a/runtime/devices/device.hpp b/runtime/devices/device.hpp index e48d0af95f9d32fc9318e16a12af0b5192f346ed..092cfaedc92f0b93d4f2f8ed7411fa87bebbaf96 100644 --- a/runtime/devices/device.hpp +++ b/runtime/devices/device.hpp @@ -17,6 +17,8 @@ __host__ void device_free(void *ptr); __host__ void device_synchronize(); __host__ void copy_to_device(const void *h_ptr, void *d_ptr, size_t count); __host__ void copy_to_host(const void *d_ptr, void *h_ptr, size_t count); +__host__ void copy_slice_to_device(const void *h_ptr, void *d_ptr, size_t offset, size_t count); +__host__ void copy_slice_to_host(const void *d_ptr, void *h_ptr, size_t offset, size_t count); __host__ void copy_static_symbol_to_device(void *h_ptr, const void *d_ptr, size_t count); __host__ void copy_static_symbol_to_host(void *d_ptr, const void *h_ptr, size_t count); diff --git a/runtime/devices/dummy.cpp b/runtime/devices/dummy.cpp index 909d50ee7cddce3e3cc8131616e2051f7ea2b859..3b21e6850c08dd94124148c6bef73fe3c126b898 100644 --- a/runtime/devices/dummy.cpp +++ b/runtime/devices/dummy.cpp @@ -8,6 +8,8 @@ void device_free(void *ptr) {} void device_synchronize() {} void copy_to_device(void const *h_ptr, void *d_ptr, size_t count) {} void copy_to_host(void const *d_ptr, void *h_ptr, size_t count) {} +void copy_slice_to_device(void const *h_ptr, void *d_ptr, size_t offset, size_t count) {} +void copy_slice_to_host(void const *d_ptr, void *h_ptr, size_t offset, size_t count) {} void copy_static_symbol_to_device(void *h_ptr, const void *d_ptr, size_t count) {} void copy_static_symbol_to_host(void *d_ptr, const void *h_ptr, size_t count) {} diff --git a/runtime/pairs.cpp b/runtime/pairs.cpp index 7a11d6561ebebd6a9bcb6561378bc4a609d924b7..0b53437b13ffecdc4b80646d3b354aede3a284dd 100644 --- a/runtime/pairs.cpp +++ b/runtime/pairs.cpp @@ -158,6 +158,31 @@ FeatureProperty &PairsSimulation::getFeaturePropertyByName(std::string name) { return *fp; } +void PairsSimulation::copyArraySliceToDevice( + Array &array, action_t action, size_t offset, size_t size) { + + int array_id = array.getId(); + + if(action == Ignore || action == WriteAfterRead || action == ReadOnly) { + if(action == Ignore || !array_flags->isDeviceFlagSet(array_id)) { + if(!array.isStatic()) { + PAIRS_DEBUG( + "Copying array %s to device (offset=%d, n=%d)\n", + array.getName().c_str(), offset, size); + + pairs::copy_slice_to_device( + array.getHostPointer(), array.getDevicePointer(), offset, size); + } + } + } + + if(action != ReadOnly) { + array_flags->clearHostFlag(array_id); + } + + array_flags->setDeviceFlag(array_id); +} + void PairsSimulation::copyArrayToDevice(Array &array, action_t action, size_t size) { int array_id = array.getId(); @@ -180,6 +205,29 @@ void PairsSimulation::copyArrayToDevice(Array &array, action_t action, size_t si array_flags->setDeviceFlag(array_id); } +void PairsSimulation::copyArraySliceToHost(Array &array, action_t action, size_t offset, size_t size) { + int array_id = array.getId(); + + if(action == Ignore || action == WriteAfterRead || action == ReadOnly) { + if(action == Ignore || !array_flags->isHostFlagSet(array_id)) { + if(!array.isStatic()) { + PAIRS_DEBUG( + "Copying array %s to host (offset=%d, n=%d)\n", + array.getName().c_str(), offset, size); + + pairs::copy_slice_to_host( + array.getDevicePointer(), array.getHostPointer(), offset, size); + } + } + } + + if(action != ReadOnly) { + array_flags->clearDeviceFlag(array_id); + } + + array_flags->setHostFlag(array_id); +} + void PairsSimulation::copyArrayToHost(Array &array, action_t action, size_t size) { int array_id = array.getId(); @@ -320,6 +368,15 @@ void PairsSimulation::communicateData( nrecv_all += nrecv[d * 2 + 1]; } + /* + // TODO: this is hard-coded for 6D regular stencil, change it + int snd_offset = send_offsets[dim * 2 + 0] * elem_size * sizeof(real_t); + int rcv_offset = recv_offsets[dim * 2 + 0] * elem_size * sizeof(real_t); + int snd_size = (nsend[dim * 2 + 0] + nsend[dim * 2 + 1]) * elem_size * sizeof(real_t); + int rcv_size = (nrecv[dim * 2 + 0] + nrecv[dim * 2 + 1]) * elem_size * sizeof(real_t); + */ + + //copyArraySliceToHost(send_buf_array, Ignore, snd_offset, snd_size * elem_size * sizeof(real_t)); copyArrayToHost(send_buf_id, Ignore, nsend_all * elem_size * sizeof(real_t)); array_flags->setHostFlag(recv_buf_id); array_flags->clearDeviceFlag(recv_buf_id); @@ -331,6 +388,7 @@ void PairsSimulation::communicateData( this->getTimers()->stop(Communication); this->getTimers()->start(DeviceTransfers); + //copyArraySliceToDevice(recv_buf_array, Ignore, rcv_offset, rcv_size * elem_size * sizeof(real_t)); copyArrayToDevice(recv_buf_id, Ignore, nrecv_all * elem_size * sizeof(real_t)); this->getTimers()->stop(DeviceTransfers); } diff --git a/runtime/pairs.hpp b/runtime/pairs.hpp index 415d63467100f3846dce2dee0fced17b014929ee..8944dfda738602867d7a9fb768cf81fd7e083d90 100644 --- a/runtime/pairs.hpp +++ b/runtime/pairs.hpp @@ -93,6 +93,7 @@ public: } void copyArrayToDevice(Array &array, action_t action, size_t size); + void copyArraySliceToDevice(Array &array, action_t action, size_t offset, size_t size); void copyArrayToHost(array_t id, action_t action) { auto& array = getArray(id); @@ -104,6 +105,7 @@ public: } void copyArrayToHost(Array &array, action_t action, size_t size); + void copyArraySliceToHost(Array &array, action_t action, size_t offset, size_t size); // Properties Property &getProperty(property_t id);