diff --git a/examples/modular/sd_sample_3_CPU.cpp b/examples/modular/sd_sample_3_CPU.cpp index ad532f473ebeb7bf978139ac39c5f8d5af1f6b5f..315a7a2a8b67023ed2a5440df9f301d2a6c5059c 100644 --- a/examples/modular/sd_sample_3_CPU.cpp +++ b/examples/modular/sd_sample_3_CPU.cpp @@ -25,10 +25,9 @@ int main(int argc, char **argv) { pairs_sim->create_halfspace(1,1,1, 0, 0, -1, 0, 13); pairs::id_t pUid = pairs_sim->create_sphere(0.6, 0.6, 0.7, 0, 0, 0, 1000, 0.05, 0, 0); - pairs::id_t pUid2 = pairs_sim->create_sphere(0.4, 0.4, 0.76, 2, 2, 0, 1000, 0.05, 0, 0); + pairs_sim->create_sphere(0.4, 0.4, 0.76, 2, 2, 0, 1000, 0.05, 0, 0); MPI_Allreduce(MPI_IN_PLACE, &pUid, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(MPI_IN_PLACE, &pUid2, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); auto pIsLocalInMyRank = [&](pairs::id_t uid){return ac->uidToIdxLocal(uid) != ac->getInvalidIdx();}; diff --git a/examples/modular/sd_sample_3_GPU.cu b/examples/modular/sd_sample_3_GPU.cu index ef379e59a46cbc3db041dc92c55e8c1f157c45df..964ede0c516999fb5aea1d7361e4fc19ff4ee088 100644 --- a/examples/modular/sd_sample_3_GPU.cu +++ b/examples/modular/sd_sample_3_GPU.cu @@ -42,10 +42,9 @@ int main(int argc, char **argv) { pairs_sim->create_halfspace(1,1,1, 0, 0, -1, 0, 13); pairs::id_t pUid = pairs_sim->create_sphere(0.6, 0.6, 0.7, 0, 0, 0, 1000, 0.05, 0, 0); - pairs::id_t pUid2 = pairs_sim->create_sphere(0.4, 0.4, 0.76, 2, 2, 0, 1000, 0.05, 0, 0); + pairs_sim->create_sphere(0.4, 0.4, 0.76, 2, 2, 0, 1000, 0.05, 0, 0); MPI_Allreduce(MPI_IN_PLACE, &pUid, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(MPI_IN_PLACE, &pUid2, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); auto pIsLocalInMyRank = [&](pairs::id_t uid){return ac->uidToIdxLocal(uid) != ac->getInvalidIdx();}; @@ -60,8 +59,8 @@ int main(int argc, char **argv) { double dt = 1e-3; for (int t=0; t<num_timesteps; ++t){ - // Up-to-date uids might be on host or device. So sync uid before accessing them from host - ac->syncUid(); + // Up-to-date uids might be on host or device. So sync uid in Host before accessing them from host + ac->syncUid(PairsAccessor::Host); // Print position of particle pUid //------------------------------------------------------------------------------------------- @@ -69,16 +68,20 @@ int main(int argc, char **argv) { std::cout << "Timestep (" << t << "): Particle " << pUid << " is in rank " << pairs_sim->rank() << std::endl; int idx = ac->uidToIdxLocal(pUid); - // Up-to-date position might be on host or device. So sync position before printing it from host: - ac->syncPosition(); + // Up-to-date position might be on host or device. + // Sync position on HostAndDevice before printing it from host and device: + ac->syncPosition(PairsAccessor::HostAndDevice); + std::cout << "Position [from host] = (" << ac->getPosition(idx)[0] << ", " << ac->getPosition(idx)[1] << ", " << ac->getPosition(idx)[2] << ")" << std::endl; - // Position is still synced. Print it from device: + // Position is synced on both host and device. Print position from device: print_position<<<1,1>>>(*ac, idx); checkCudaError(cudaDeviceSynchronize(), "print_position"); + + // There's no need to sync position here to continue the simulation, since position wasn't modified. } // Calculate forces @@ -89,20 +92,24 @@ int main(int argc, char **argv) { // Change gravitational force on particle pUid //------------------------------------------------------------------------------------------- - ac->syncUid(); + // Here we are syncing Uid on Host again for clarity, but no data transfer will happen since Uid is already on host + ac->syncUid(PairsAccessor::Host); + if(pIsLocalInMyRank(pUid)){ std::cout << "Force Timestep (" << t << "): Particle " << pUid << " is in rank " << pairs_sim->rank() << std::endl; int idx = ac->uidToIdxLocal(pUid); - // Up-to-date force and mass might be on host or device. So sync before accessing them on device: - ac->syncForce(); - ac->syncMass(); + // Up-to-date force and mass might be on host or device. + // So sync them in Device before accessing them on device. (No data will be transfered if they are already on device) + ac->syncForce(PairsAccessor::Device); + ac->syncMass(PairsAccessor::Device); // Modify force from device: change_gravitational_force<<<1,1>>>(*ac, idx); checkCudaError(cudaDeviceSynchronize(), "change_gravitational_force"); - // Force on device was modified. So sync force before printing it from host: + // Force on device was modified. + // So sync force before continuing the simulation. By default (no args), force is synced on both host and device ac->syncForce(); std::cout << "Force [from host] after changing = (" << ac->getForce(idx)[0] << ", " diff --git a/src/pairs/code_gen/accessor.py b/src/pairs/code_gen/accessor.py index 9996a6f1b83c0cda0e3b6261545b0f30c34e477c..cd5a9869552aba8817b9dbb566933c8983acbc60 100644 --- a/src/pairs/code_gen/accessor.py +++ b/src/pairs/code_gen/accessor.py @@ -28,17 +28,15 @@ class PairsAcessor: self.print("public:") self.print.add_indent(4) - if self.target.is_gpu(): - self.update() - + self.sync_mode_enum() + self.update() self.constructor() for p in self.sim.properties: if (p.type()==Types.Vector) or (Types.is_scalar(p.type())): self.get_property(p) self.set_property(p) - if self.target.is_gpu(): - self.sync_property(p) + self.sync_property(p) self.utility_funcs() @@ -73,15 +71,16 @@ class PairsAcessor: def update(self): self.print(f"{self.host_attr}void update(){{") - self.print.add_indent(4) - self.print(f"cudaMemcpy(nlocal_d, &(ps->pobj->nlocal), sizeof(int), cudaMemcpyHostToDevice);") - self.print(f"cudaMemcpy(nghost_d, &(ps->pobj->nghost), sizeof(int), cudaMemcpyHostToDevice);") + if self.target.is_gpu(): + self.print.add_indent(4) + self.print(f"cudaMemcpy(nlocal_d, &(ps->pobj->nlocal), sizeof(int), cudaMemcpyHostToDevice);") + self.print(f"cudaMemcpy(nghost_d, &(ps->pobj->nghost), sizeof(int), cudaMemcpyHostToDevice);") - for p in self.sim.properties: - pname = p.name() - self.print(f"{pname}_d = ps->pobj->{pname}_d;") + for p in self.sim.properties: + pname = p.name() + self.print(f"{pname}_d = ps->pobj->{pname}_d;") - self.print.add_indent(-4) + self.print.add_indent(-4) self.print("}") self.print("") @@ -193,6 +192,13 @@ class PairsAcessor: self.print("}") self.print("") + + def sync_mode_enum(self): + self.print("enum SyncMode{") + self.print(" HostAndDevice = 0,") + self.print(" Host,") + self.print(" Device") + self.print("};") def sync_property(self, prop): pname = prop.name() @@ -200,91 +206,83 @@ class PairsAcessor: splitname = pname.split('_') funcname = ''.join(word.capitalize() for word in splitname) - self.print(f"{self.host_attr}void sync{funcname}(){{") - self.print.add_indent(4) - self.print(f"{pname}_d = ps->pobj->{pname}_d;") - self.print(f"cudaMemcpy(&{pname}_device_flag_h, {pname}_device_flag_d, sizeof(bool), cudaMemcpyDeviceToHost);") - self.print("") - - - ##################################################################################################################### - ##################################################################################################################### - # self.print(f"if (({pname}_host_flag && {pname}_device_flag_h) || ") - # self.print.add_indent(4) - # self.print(f"({pname}_host_flag && !ps->pairs_runtime->getPropFlags()->isHostFlagSet({pid})) ||") - # self.print(f"({pname}_device_flag_h && !ps->pairs_runtime->getPropFlags()->isDeviceFlagSet({pid}))){{") - # self.print(f"PAIRS_ERROR(\"OUT OF SYNC! Both host and device versions of {pname} are in a modified state.\\n\");") - # self.print("exit(-1);") - # self.print.add_indent(-4) - # self.print("}") - # self.print("") + self.print(f"{self.host_attr}void sync{funcname}(SyncMode sync_mode = HostAndDevice){{") + if self.target.is_gpu(): + self.print.add_indent(4) + self.print(f"{pname}_d = ps->pobj->{pname}_d;") + self.print(f"cudaMemcpy(&{pname}_device_flag_h, {pname}_device_flag_d, sizeof(bool), cudaMemcpyDeviceToHost);") + self.print("") + - self.print(f"if ({pname}_host_flag && {pname}_device_flag_h){{") - self.print.add_indent(4) - self.print(f"PAIRS_ERROR(\"OUT OF SYNC 1! Both host and device versions of {pname} are in a modified state.\\n\");") - self.print("exit(-1);") - self.print.add_indent(-4) - self.print("}") - self.print("") - - self.print(f"if ({pname}_host_flag && !ps->pairs_runtime->getPropFlags()->isHostFlagSet({pid})){{") - self.print.add_indent(4) - self.print(f"PAIRS_ERROR(\"OUT OF SYNC 2! Both host and device versions of {pname} are in a modified state.\\n\");") - self.print("exit(-1);") - self.print.add_indent(-4) - self.print("}") - self.print("") + ##################################################################################################################### + ##################################################################################################################### + # self.print(f"if (({pname}_host_flag && {pname}_device_flag_h) || ") + # self.print.add_indent(4) + # self.print(f"({pname}_host_flag && !ps->pairs_runtime->getPropFlags()->isHostFlagSet({pid})) ||") + # self.print(f"({pname}_device_flag_h && !ps->pairs_runtime->getPropFlags()->isDeviceFlagSet({pid}))){{") + # self.print(f"PAIRS_ERROR(\"OUT OF SYNC! Both host and device versions of {pname} are in a modified state.\\n\");") + # self.print("exit(-1);") + # self.print.add_indent(-4) + # self.print("}") + # self.print("") + + + self.print(f"if ({pname}_host_flag && {pname}_device_flag_h){{") + self.print(f" PAIRS_ERROR(\"OUT OF SYNC 1! Both host and device versions of {pname} are in a modified state.\\n\");") + self.print(" exit(-1);") + self.print("}") + self.print("") - self.print(f"if ({pname}_device_flag_h && !ps->pairs_runtime->getPropFlags()->isDeviceFlagSet({pid})){{") - self.print.add_indent(4) - self.print(f"PAIRS_ERROR(\"OUT OF SYNC 3! Both host and device versions of {pname} are in a modified state.\\n\");") - self.print("exit(-1);") - self.print.add_indent(-4) - self.print("}") - self.print("") + self.print(f"if ({pname}_host_flag && !ps->pairs_runtime->getPropFlags()->isHostFlagSet({pid})){{") + self.print(f" PAIRS_ERROR(\"OUT OF SYNC 2! Both host and device versions of {pname} are in a modified state.\\n\");") + self.print(" exit(-1);") + self.print("}") + self.print("") - ##################################################################################################################### - ##################################################################################################################### + self.print(f"if ({pname}_device_flag_h && !ps->pairs_runtime->getPropFlags()->isDeviceFlagSet({pid})){{") + self.print(f" PAIRS_ERROR(\"OUT OF SYNC 3! Both host and device versions of {pname} are in a modified state.\\n\");") + self.print(" exit(-1);") + self.print("}") + self.print("") + ##################################################################################################################### + ##################################################################################################################### - self.print(f"if ({pname}_host_flag){{") - self.print.add_indent(4) - self.print(f"ps->pairs_runtime->getPropFlags()->setHostFlag({pid});") - self.print(f"ps->pairs_runtime->getPropFlags()->clearDeviceFlag({pid});") - self.print.add_indent(-4) - self.print("}") - - self.print(f"else if ({pname}_device_flag_h){{") - self.print.add_indent(4) - self.print(f"ps->pairs_runtime->getPropFlags()->setDeviceFlag({pid});") - self.print(f"ps->pairs_runtime->getPropFlags()->clearHostFlag({pid});") - self.print.add_indent(-4) - self.print("}") - self.print("") - nelems = Types.number_of_elements(self.sim, prop.type()) - tkw = Types.c_keyword(self.sim, prop.type()) + self.print(f"if ({pname}_host_flag){{") + self.print(f" ps->pairs_runtime->getPropFlags()->setHostFlag({pid});") + self.print(f" ps->pairs_runtime->getPropFlags()->clearDeviceFlag({pid});") + self.print("}") + + self.print(f"else if ({pname}_device_flag_h){{") + self.print(f" ps->pairs_runtime->getPropFlags()->setDeviceFlag({pid});") + self.print(f" ps->pairs_runtime->getPropFlags()->clearHostFlag({pid});") + self.print("}") + self.print("") - self.print(f"if (ps->pairs_runtime->getPropFlags()->isHostFlagSet({pid})) {{") - self.print.add_indent(4) + nelems = Types.number_of_elements(self.sim, prop.type()) + tkw = Types.c_keyword(self.sim, prop.type()) - self.print(f"ps->pairs_runtime->copyPropertyToDevice({pid}, ReadOnly, (((ps->pobj->nlocal + ps->pobj->nghost) * {nelems}) * sizeof({tkw})));") - self.print.add_indent(-4) - self.print("}") + self.print(f"if (sync_mode==Device || sync_mode==HostAndDevice) {{") + self.print(f" if (ps->pairs_runtime->getPropFlags()->isHostFlagSet({pid})) {{") + self.print(f" ps->pairs_runtime->copyPropertyToDevice({pid}, ReadOnly, (((ps->pobj->nlocal + ps->pobj->nghost) * {nelems}) * sizeof({tkw})));") + self.print(" }") + self.print("}") + self.print("") - self.print(f"else if (ps->pairs_runtime->getPropFlags()->isDeviceFlagSet({pid})) {{") - self.print.add_indent(4) - self.print(f"ps->pairs_runtime->copyPropertyToHost({pid}, ReadOnly, (((ps->pobj->nlocal + ps->pobj->nghost) * {nelems}) * sizeof({tkw})));") - self.print.add_indent(-4) - self.print("}") - self.print("") + self.print(f"if (sync_mode==Host || sync_mode==HostAndDevice) {{") + self.print(f" if (ps->pairs_runtime->getPropFlags()->isDeviceFlagSet({pid})) {{") + self.print(f" ps->pairs_runtime->copyPropertyToHost({pid}, ReadOnly, (((ps->pobj->nlocal + ps->pobj->nghost) * {nelems}) * sizeof({tkw})));") + self.print(" }") + self.print("}") + self.print("") - self.print(f"{pname}_host_flag = false;") - self.print(f"{pname}_device_flag_h = false;") - self.print(f"cudaMemcpy({pname}_device_flag_d, &{pname}_device_flag_h, sizeof(bool), cudaMemcpyHostToDevice);") + self.print(f"{pname}_host_flag = false;") + self.print(f"{pname}_device_flag_h = false;") + self.print(f"cudaMemcpy({pname}_device_flag_d, &{pname}_device_flag_h, sizeof(bool), cudaMemcpyHostToDevice);") - self.print.add_indent(-4) + self.print.add_indent(-4) self.print("}") self.print("")