Skip to content
Snippets Groups Projects
Commit ae6287d2 authored by Behzad Safaei's avatar Behzad Safaei
Browse files

Add explicit sync modes

parent 98a3381a
Branches
Tags
No related merge requests found
......@@ -25,10 +25,9 @@ int main(int argc, char **argv) {
pairs_sim->create_halfspace(1,1,1, 0, 0, -1, 0, 13);
pairs::id_t pUid = pairs_sim->create_sphere(0.6, 0.6, 0.7, 0, 0, 0, 1000, 0.05, 0, 0);
pairs::id_t pUid2 = pairs_sim->create_sphere(0.4, 0.4, 0.76, 2, 2, 0, 1000, 0.05, 0, 0);
pairs_sim->create_sphere(0.4, 0.4, 0.76, 2, 2, 0, 1000, 0.05, 0, 0);
MPI_Allreduce(MPI_IN_PLACE, &pUid, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD);
MPI_Allreduce(MPI_IN_PLACE, &pUid2, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD);
auto pIsLocalInMyRank = [&](pairs::id_t uid){return ac->uidToIdxLocal(uid) != ac->getInvalidIdx();};
......
......@@ -42,10 +42,9 @@ int main(int argc, char **argv) {
pairs_sim->create_halfspace(1,1,1, 0, 0, -1, 0, 13);
pairs::id_t pUid = pairs_sim->create_sphere(0.6, 0.6, 0.7, 0, 0, 0, 1000, 0.05, 0, 0);
pairs::id_t pUid2 = pairs_sim->create_sphere(0.4, 0.4, 0.76, 2, 2, 0, 1000, 0.05, 0, 0);
pairs_sim->create_sphere(0.4, 0.4, 0.76, 2, 2, 0, 1000, 0.05, 0, 0);
MPI_Allreduce(MPI_IN_PLACE, &pUid, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD);
MPI_Allreduce(MPI_IN_PLACE, &pUid2, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD);
auto pIsLocalInMyRank = [&](pairs::id_t uid){return ac->uidToIdxLocal(uid) != ac->getInvalidIdx();};
......@@ -60,8 +59,8 @@ int main(int argc, char **argv) {
double dt = 1e-3;
for (int t=0; t<num_timesteps; ++t){
// Up-to-date uids might be on host or device. So sync uid before accessing them from host
ac->syncUid();
// Up-to-date uids might be on host or device. So sync uid in Host before accessing them from host
ac->syncUid(PairsAccessor::Host);
// Print position of particle pUid
//-------------------------------------------------------------------------------------------
......@@ -69,16 +68,20 @@ int main(int argc, char **argv) {
std::cout << "Timestep (" << t << "): Particle " << pUid << " is in rank " << pairs_sim->rank() << std::endl;
int idx = ac->uidToIdxLocal(pUid);
// Up-to-date position might be on host or device. So sync position before printing it from host:
ac->syncPosition();
// Up-to-date position might be on host or device.
// Sync position on HostAndDevice before printing it from host and device:
ac->syncPosition(PairsAccessor::HostAndDevice);
std::cout << "Position [from host] = ("
<< ac->getPosition(idx)[0] << ", "
<< ac->getPosition(idx)[1] << ", "
<< ac->getPosition(idx)[2] << ")" << std::endl;
// Position is still synced. Print it from device:
// Position is synced on both host and device. Print position from device:
print_position<<<1,1>>>(*ac, idx);
checkCudaError(cudaDeviceSynchronize(), "print_position");
// There's no need to sync position here to continue the simulation, since position wasn't modified.
}
// Calculate forces
......@@ -89,20 +92,24 @@ int main(int argc, char **argv) {
// Change gravitational force on particle pUid
//-------------------------------------------------------------------------------------------
ac->syncUid();
// Here we are syncing Uid on Host again for clarity, but no data transfer will happen since Uid is already on host
ac->syncUid(PairsAccessor::Host);
if(pIsLocalInMyRank(pUid)){
std::cout << "Force Timestep (" << t << "): Particle " << pUid << " is in rank " << pairs_sim->rank() << std::endl;
int idx = ac->uidToIdxLocal(pUid);
// Up-to-date force and mass might be on host or device. So sync before accessing them on device:
ac->syncForce();
ac->syncMass();
// Up-to-date force and mass might be on host or device.
// So sync them in Device before accessing them on device. (No data will be transfered if they are already on device)
ac->syncForce(PairsAccessor::Device);
ac->syncMass(PairsAccessor::Device);
// Modify force from device:
change_gravitational_force<<<1,1>>>(*ac, idx);
checkCudaError(cudaDeviceSynchronize(), "change_gravitational_force");
// Force on device was modified. So sync force before printing it from host:
// Force on device was modified.
// So sync force before continuing the simulation. By default (no args), force is synced on both host and device
ac->syncForce();
std::cout << "Force [from host] after changing = ("
<< ac->getForce(idx)[0] << ", "
......
......@@ -28,17 +28,15 @@ class PairsAcessor:
self.print("public:")
self.print.add_indent(4)
if self.target.is_gpu():
self.update()
self.sync_mode_enum()
self.update()
self.constructor()
for p in self.sim.properties:
if (p.type()==Types.Vector) or (Types.is_scalar(p.type())):
self.get_property(p)
self.set_property(p)
if self.target.is_gpu():
self.sync_property(p)
self.sync_property(p)
self.utility_funcs()
......@@ -73,15 +71,16 @@ class PairsAcessor:
def update(self):
self.print(f"{self.host_attr}void update(){{")
self.print.add_indent(4)
self.print(f"cudaMemcpy(nlocal_d, &(ps->pobj->nlocal), sizeof(int), cudaMemcpyHostToDevice);")
self.print(f"cudaMemcpy(nghost_d, &(ps->pobj->nghost), sizeof(int), cudaMemcpyHostToDevice);")
if self.target.is_gpu():
self.print.add_indent(4)
self.print(f"cudaMemcpy(nlocal_d, &(ps->pobj->nlocal), sizeof(int), cudaMemcpyHostToDevice);")
self.print(f"cudaMemcpy(nghost_d, &(ps->pobj->nghost), sizeof(int), cudaMemcpyHostToDevice);")
for p in self.sim.properties:
pname = p.name()
self.print(f"{pname}_d = ps->pobj->{pname}_d;")
for p in self.sim.properties:
pname = p.name()
self.print(f"{pname}_d = ps->pobj->{pname}_d;")
self.print.add_indent(-4)
self.print.add_indent(-4)
self.print("}")
self.print("")
......@@ -193,6 +192,13 @@ class PairsAcessor:
self.print("}")
self.print("")
def sync_mode_enum(self):
self.print("enum SyncMode{")
self.print(" HostAndDevice = 0,")
self.print(" Host,")
self.print(" Device")
self.print("};")
def sync_property(self, prop):
pname = prop.name()
......@@ -200,91 +206,83 @@ class PairsAcessor:
splitname = pname.split('_')
funcname = ''.join(word.capitalize() for word in splitname)
self.print(f"{self.host_attr}void sync{funcname}(){{")
self.print.add_indent(4)
self.print(f"{pname}_d = ps->pobj->{pname}_d;")
self.print(f"cudaMemcpy(&{pname}_device_flag_h, {pname}_device_flag_d, sizeof(bool), cudaMemcpyDeviceToHost);")
self.print("")
#####################################################################################################################
#####################################################################################################################
# self.print(f"if (({pname}_host_flag && {pname}_device_flag_h) || ")
# self.print.add_indent(4)
# self.print(f"({pname}_host_flag && !ps->pairs_runtime->getPropFlags()->isHostFlagSet({pid})) ||")
# self.print(f"({pname}_device_flag_h && !ps->pairs_runtime->getPropFlags()->isDeviceFlagSet({pid}))){{")
# self.print(f"PAIRS_ERROR(\"OUT OF SYNC! Both host and device versions of {pname} are in a modified state.\\n\");")
# self.print("exit(-1);")
# self.print.add_indent(-4)
# self.print("}")
# self.print("")
self.print(f"{self.host_attr}void sync{funcname}(SyncMode sync_mode = HostAndDevice){{")
if self.target.is_gpu():
self.print.add_indent(4)
self.print(f"{pname}_d = ps->pobj->{pname}_d;")
self.print(f"cudaMemcpy(&{pname}_device_flag_h, {pname}_device_flag_d, sizeof(bool), cudaMemcpyDeviceToHost);")
self.print("")
self.print(f"if ({pname}_host_flag && {pname}_device_flag_h){{")
self.print.add_indent(4)
self.print(f"PAIRS_ERROR(\"OUT OF SYNC 1! Both host and device versions of {pname} are in a modified state.\\n\");")
self.print("exit(-1);")
self.print.add_indent(-4)
self.print("}")
self.print("")
self.print(f"if ({pname}_host_flag && !ps->pairs_runtime->getPropFlags()->isHostFlagSet({pid})){{")
self.print.add_indent(4)
self.print(f"PAIRS_ERROR(\"OUT OF SYNC 2! Both host and device versions of {pname} are in a modified state.\\n\");")
self.print("exit(-1);")
self.print.add_indent(-4)
self.print("}")
self.print("")
#####################################################################################################################
#####################################################################################################################
# self.print(f"if (({pname}_host_flag && {pname}_device_flag_h) || ")
# self.print.add_indent(4)
# self.print(f"({pname}_host_flag && !ps->pairs_runtime->getPropFlags()->isHostFlagSet({pid})) ||")
# self.print(f"({pname}_device_flag_h && !ps->pairs_runtime->getPropFlags()->isDeviceFlagSet({pid}))){{")
# self.print(f"PAIRS_ERROR(\"OUT OF SYNC! Both host and device versions of {pname} are in a modified state.\\n\");")
# self.print("exit(-1);")
# self.print.add_indent(-4)
# self.print("}")
# self.print("")
self.print(f"if ({pname}_host_flag && {pname}_device_flag_h){{")
self.print(f" PAIRS_ERROR(\"OUT OF SYNC 1! Both host and device versions of {pname} are in a modified state.\\n\");")
self.print(" exit(-1);")
self.print("}")
self.print("")
self.print(f"if ({pname}_device_flag_h && !ps->pairs_runtime->getPropFlags()->isDeviceFlagSet({pid})){{")
self.print.add_indent(4)
self.print(f"PAIRS_ERROR(\"OUT OF SYNC 3! Both host and device versions of {pname} are in a modified state.\\n\");")
self.print("exit(-1);")
self.print.add_indent(-4)
self.print("}")
self.print("")
self.print(f"if ({pname}_host_flag && !ps->pairs_runtime->getPropFlags()->isHostFlagSet({pid})){{")
self.print(f" PAIRS_ERROR(\"OUT OF SYNC 2! Both host and device versions of {pname} are in a modified state.\\n\");")
self.print(" exit(-1);")
self.print("}")
self.print("")
#####################################################################################################################
#####################################################################################################################
self.print(f"if ({pname}_device_flag_h && !ps->pairs_runtime->getPropFlags()->isDeviceFlagSet({pid})){{")
self.print(f" PAIRS_ERROR(\"OUT OF SYNC 3! Both host and device versions of {pname} are in a modified state.\\n\");")
self.print(" exit(-1);")
self.print("}")
self.print("")
#####################################################################################################################
#####################################################################################################################
self.print(f"if ({pname}_host_flag){{")
self.print.add_indent(4)
self.print(f"ps->pairs_runtime->getPropFlags()->setHostFlag({pid});")
self.print(f"ps->pairs_runtime->getPropFlags()->clearDeviceFlag({pid});")
self.print.add_indent(-4)
self.print("}")
self.print(f"else if ({pname}_device_flag_h){{")
self.print.add_indent(4)
self.print(f"ps->pairs_runtime->getPropFlags()->setDeviceFlag({pid});")
self.print(f"ps->pairs_runtime->getPropFlags()->clearHostFlag({pid});")
self.print.add_indent(-4)
self.print("}")
self.print("")
nelems = Types.number_of_elements(self.sim, prop.type())
tkw = Types.c_keyword(self.sim, prop.type())
self.print(f"if ({pname}_host_flag){{")
self.print(f" ps->pairs_runtime->getPropFlags()->setHostFlag({pid});")
self.print(f" ps->pairs_runtime->getPropFlags()->clearDeviceFlag({pid});")
self.print("}")
self.print(f"else if ({pname}_device_flag_h){{")
self.print(f" ps->pairs_runtime->getPropFlags()->setDeviceFlag({pid});")
self.print(f" ps->pairs_runtime->getPropFlags()->clearHostFlag({pid});")
self.print("}")
self.print("")
self.print(f"if (ps->pairs_runtime->getPropFlags()->isHostFlagSet({pid})) {{")
self.print.add_indent(4)
nelems = Types.number_of_elements(self.sim, prop.type())
tkw = Types.c_keyword(self.sim, prop.type())
self.print(f"ps->pairs_runtime->copyPropertyToDevice({pid}, ReadOnly, (((ps->pobj->nlocal + ps->pobj->nghost) * {nelems}) * sizeof({tkw})));")
self.print.add_indent(-4)
self.print("}")
self.print(f"if (sync_mode==Device || sync_mode==HostAndDevice) {{")
self.print(f" if (ps->pairs_runtime->getPropFlags()->isHostFlagSet({pid})) {{")
self.print(f" ps->pairs_runtime->copyPropertyToDevice({pid}, ReadOnly, (((ps->pobj->nlocal + ps->pobj->nghost) * {nelems}) * sizeof({tkw})));")
self.print(" }")
self.print("}")
self.print("")
self.print(f"else if (ps->pairs_runtime->getPropFlags()->isDeviceFlagSet({pid})) {{")
self.print.add_indent(4)
self.print(f"ps->pairs_runtime->copyPropertyToHost({pid}, ReadOnly, (((ps->pobj->nlocal + ps->pobj->nghost) * {nelems}) * sizeof({tkw})));")
self.print.add_indent(-4)
self.print("}")
self.print("")
self.print(f"if (sync_mode==Host || sync_mode==HostAndDevice) {{")
self.print(f" if (ps->pairs_runtime->getPropFlags()->isDeviceFlagSet({pid})) {{")
self.print(f" ps->pairs_runtime->copyPropertyToHost({pid}, ReadOnly, (((ps->pobj->nlocal + ps->pobj->nghost) * {nelems}) * sizeof({tkw})));")
self.print(" }")
self.print("}")
self.print("")
self.print(f"{pname}_host_flag = false;")
self.print(f"{pname}_device_flag_h = false;")
self.print(f"cudaMemcpy({pname}_device_flag_d, &{pname}_device_flag_h, sizeof(bool), cudaMemcpyHostToDevice);")
self.print(f"{pname}_host_flag = false;")
self.print(f"{pname}_device_flag_h = false;")
self.print(f"cudaMemcpy({pname}_device_flag_d, &{pname}_device_flag_h, sizeof(bool), cudaMemcpyHostToDevice);")
self.print.add_indent(-4)
self.print.add_indent(-4)
self.print("}")
self.print("")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment