Skip to content
Snippets Groups Projects
Commit 8ba950c9 authored by Behzad Safaei's avatar Behzad Safaei
Browse files

Revise accessor: use pointers to structs for device and host

parent 65004c9c
No related merge requests found
from pairs.ir.types import Types
from pairs.ir.features import FeatureProperty
from pairs.ir.properties import Property
class PairsAcessor:
def __init__(self, cgen):
......@@ -12,6 +14,15 @@ class PairsAcessor:
def generate(self):
self.print("")
if self.target.is_gpu():
self.print("namespace pairs::internal{")
self.print.add_indent(4)
self.DeviceProps_struct()
self.HostProps_struct()
self.print.add_indent(-4)
self.print("}")
self.print("")
if self.target.is_gpu():
self.host_device_attr = "__host__ __device__ "
self.host_attr = "__host__ "
......@@ -28,9 +39,10 @@ class PairsAcessor:
self.print("public:")
self.print.add_indent(4)
self.sync_mode_enum()
self.sync_ctx_enum()
self.update()
self.constructor()
self.destructor()
for p in self.sim.properties:
if (p.type()==Types.Vector) or (Types.is_scalar(p.type())):
......@@ -39,8 +51,8 @@ class PairsAcessor:
self.sync_property(p)
for fp in self.sim.feature_properties:
self.get_feature_property(fp)
self.set_feature_property(fp)
self.get_property(fp)
self.set_property(fp)
self.sync_feature_property(fp)
self.utility_funcs()
......@@ -49,184 +61,141 @@ class PairsAcessor:
self.print("};")
self.print("")
def get_fp_body(self, fp, device=False):
def DeviceProps_struct(self):
self.print("struct DeviceProps{")
self.print.add_indent(4)
fp_name = fp.name()
f_name = fp.feature().name()
tkw = Types.c_accessor_keyword(self.sim, fp.type())
if self.target.is_gpu() and device:
v = f"{fp_name}_d"
else:
v = f"ps->pobj->{fp_name}"
idx = f"{fp.feature().nkinds()}*{f_name}1 + {f_name}2"
if Types.is_scalar(fp.type()):
self.print(f"return {v}[{idx}];")
else:
nelems = Types.number_of_elements(self.sim, fp.type())
return_values = [f"{v}[({idx})*{nelems} + {n}]" for n in range(nelems)]
self.print(f"return {tkw}(" + ", ".join(rv for rv in return_values) + ");")
self.print.add_indent(-4)
def get_feature_property(self, fp):
fp_name = fp.name()
tkw = Types.c_accessor_keyword(self.sim, fp.type())
f_name = fp.feature().name()
splitname = f_name.split('_') + fp_name.split('_')
funcname = ''.join(word.capitalize() for word in splitname)
self.print(f"{self.host_device_attr}{tkw} get{funcname}(const size_t {f_name}1, const size_t {f_name}2) const{{")
if self.target.is_gpu():
self.ifdef_else("__CUDA_ARCH__", self.get_fp_body, [fp, True], self.get_fp_body, [fp, False])
else:
self.get_fp_body(fp, False)
self.print("}")
self.print("int nlocal;")
self.print("int nghost;")
self.print("")
def set_fp_body(self, fp, device=False):
self.print.add_indent(4)
fp_name = fp.name()
f_name = fp.feature().name()
tkw = Types.c_accessor_keyword(self.sim, fp.type())
if self.target.is_gpu() and device:
v = f"{fp_name}_d"
else:
v = f"ps->pobj->{fp_name}"
self.print("//Property device pointers")
for p in self.sim.properties:
pname = p.name()
tkw = Types.c_keyword(self.sim, p.type())
self.print(f"{tkw} *{pname}_d;")
idx = f"{fp.feature().nkinds()}*{f_name}1 + {f_name}2"
self.print("")
self.print("//Property device flag pointers")
for p in self.sim.properties:
pname = p.name()
tkw = Types.c_keyword(self.sim, Types.Boolean)
self.print(f"{tkw} *{pname}_device_flag_d;")
if Types.is_scalar(fp.type()):
self.print(f"{v}[{idx}] = value;")
else:
nelems = Types.number_of_elements(self.sim, fp.type())
set_values = [f"{v}[({idx})*{nelems} + {n}] = value[{n}];" for n in range(nelems)]
for sv in set_values:
self.print(sv)
self.print("")
self.print("//Feature properties on device are global")
if self.target.is_gpu():
flag = f"*{fp_name}_device_flag_d" if device else f"{fp_name}_host_flag"
self.print(f"{flag} = true;")
self.print("")
self.print("//Feature properties have no flags on device since they can't be modified on device")
self.print.add_indent(-4)
self.print("};")
self.print("")
def HostProps_struct(self):
self.print("// HostProps only contains property flags, since properties themselves can be directly accessed through ps->pobj")
self.print("// TODO: Move properties out of PairsObjects into DeviceProps and HostProps, so that all 3 structs have mutually exclusive members")
self.print("struct HostProps{")
self.print.add_indent(4)
def set_feature_property(self, fp):
fp_name = fp.name()
tkw = Types.c_accessor_keyword(self.sim, fp.type())
f_name = fp.feature().name()
self.print("")
self.print("//Property host pointers are in PairsObjects")
splitname = f_name.split('_') + fp_name.split('_')
funcname = ''.join(word.capitalize() for word in splitname)
self.print("")
self.print("//Property host flags")
for p in self.sim.properties:
pname = p.name()
tkw = Types.c_keyword(self.sim, Types.Boolean)
self.print(f"{tkw} {pname}_host_flag = false;")
# Feature properties can only be set from host
self.print(f"{self.host_attr}void set{funcname}(const size_t {f_name}1, const size_t {f_name}2, const {tkw} &value){{")
self.set_fp_body(fp, False)
self.print("}")
self.print("")
self.print("//Property device flags")
for p in self.sim.properties:
pname = p.name()
tkw = Types.c_keyword(self.sim, Types.Boolean)
self.print(f"{tkw} {pname}_device_flag_h = false;")
def sync_feature_property(self, fp):
fp_id = fp.id()
fp_name = fp.name()
f_name = fp.feature().name()
splitname = f_name.split('_') + fp_name.split('_')
funcname = ''.join(word.capitalize() for word in splitname)
self.print(f"{self.host_attr}void sync{funcname}(SyncMode sync_mode = HostAndDevice){{")
if self.target.is_gpu():
self.print.add_indent(4)
self.print(f"if ({fp_name}_host_flag) {{")
self.print(f" ps->pairs_runtime->copyFeaturePropertyToDevice({fp_id});")
self.print("}")
self.print("")
self.print(f"{fp_name}_host_flag = false;")
self.print.add_indent(-4)
self.print("")
self.print("//Feature property host pointers are in PairsObjects")
self.print("}")
self.print("")
self.print("//Feature property host flags")
for fp in self.sim.feature_properties:
fpname = fp.name()
tkw = Types.c_keyword(self.sim, Types.Boolean)
self.print(f"{tkw} {fpname}_host_flag = false;")
self.print("")
self.print("//Feature properties have no device flags")
self.print.add_indent(-4)
self.print("};")
self.print("")
def member_variables(self):
self.print("PairsSimulation *ps;")
if self.target.is_gpu():
self.print("int *nlocal_d;")
self.print("int *nghost_d;")
self.print("")
self.print("//Properties")
for p in self.sim.properties:
pname = p.name()
tkw = Types.c_keyword(self.sim, p.type())
self.print(f"{tkw} *{pname}_d;")
self.print("")
self.print("//Property flags")
for p in self.sim.properties:
pname = p.name()
tkw = Types.c_keyword(self.sim, Types.Boolean)
self.print(f"{tkw} *{pname}_device_flag_d;")
self.print(f"{tkw} {pname}_device_flag_h = false;")
self.print(f"{tkw} {pname}_host_flag = false;")
self.print("")
self.print("//Feature properties are global")
self.print("")
self.print("//Feature property flags")
for fp in self.sim.feature_properties:
fpname = fp.name()
tkw = Types.c_keyword(self.sim, Types.Boolean)
self.print(f"{tkw} {fpname}_host_flag = false;")
self.print("")
self.print("pairs::internal::HostProps *hp;")
self.print("pairs::internal::DeviceProps *dp_h;")
self.print("pairs::internal::DeviceProps *dp_d;")
def update(self):
self.print(f"{self.host_attr}void update(){{")
if self.target.is_gpu():
self.print.add_indent(4)
self.print(f"cudaMemcpy(nlocal_d, &(ps->pobj->nlocal), sizeof(int), cudaMemcpyHostToDevice);")
self.print(f"cudaMemcpy(nghost_d, &(ps->pobj->nghost), sizeof(int), cudaMemcpyHostToDevice);")
self.print(f"dp_h->nlocal = ps->pobj->nlocal;")
self.print(f"dp_h->nghost = ps->pobj->nghost;")
for p in self.sim.properties:
pname = p.name()
self.print(f"{pname}_d = ps->pobj->{pname}_d;")
self.print(f"dp_h->{pname}_d = ps->pobj->{pname}_d;")
self.print(f"cudaMemcpy(dp_d, dp_h, sizeof(pairs::internal::DeviceProps), cudaMemcpyHostToDevice);")
self.print.add_indent(-4)
self.print("}")
self.print("")
def constructor(self):
if self.target.is_gpu():
self.print(f"{self.host_attr}PairsAccessor(PairsSimulation *ps_): ps(ps_){{")
self.print.add_indent(4)
self.print(f"cudaMalloc(&nlocal_d, sizeof(int));")
self.print(f"cudaMalloc(&nghost_d, sizeof(int));")
self.print("this->update();")
self.print(f"hp = new pairs::internal::HostProps;")
self.print(f"dp_h = new pairs::internal::DeviceProps;")
self.print(f"cudaMalloc(&dp_d, sizeof(pairs::internal::DeviceProps));")
for p in self.sim.properties:
pname = p.name()
tkw = Types.c_keyword(self.sim, Types.Boolean)
self.print(f"cudaMalloc(&{pname}_device_flag_d, sizeof({tkw}));")
self.print(f"cudaMemcpy({pname}_device_flag_d, &{pname}_device_flag_h, sizeof({tkw}), cudaMemcpyHostToDevice);")
self.print(f"cudaMalloc(&(dp_h->{pname}_device_flag_d), sizeof({tkw}));")
self.print("this->update();")
self.print.add_indent(-4)
self.print("}")
else:
self.print("PairsAccessor(PairsSimulation *ps_): ps(ps_){}")
self.print("")
def destructor(self):
if self.target.is_gpu():
self.print(f"{self.host_attr}~PairsAccessor(){{")
self.print.add_indent(4)
for p in self.sim.properties:
pname = p.name()
tkw = Types.c_keyword(self.sim, Types.Boolean)
self.print(f"cudaFree(dp_h->{pname}_device_flag_d);")
self.print(f"delete hp;")
self.print(f"delete dp_h;")
self.print(f"cudaFree(dp_d);")
self.print.add_indent(-4)
self.print("}")
self.print("")
def ifdef_else(self, ifdef, func1, args1, func2, args2):
self.print.add_indent(4)
self.print(f"#ifdef {ifdef}")
......@@ -235,33 +204,54 @@ class PairsAcessor:
func2(*args2)
self.print("#endif")
self.print.add_indent(-4)
def generate_ref_name(self, prop, device):
pname = prop.name()
if self.target.is_gpu() and device:
if isinstance(prop, Property):
return f"dp_d->{pname}_d"
elif isinstance(prop, FeatureProperty):
return f"{pname}_d"
else:
return f"ps->pobj->{pname}"
def getter_body(self, prop, device=False):
self.print.add_indent(4)
pname = prop.name()
tkw = Types.c_accessor_keyword(self.sim, prop.type())
if self.target.is_gpu() and device:
v = f"{pname}_d"
else:
v = f"ps->pobj->{pname}"
ptr = self.generate_ref_name(prop, device)
if isinstance(prop, Property):
idx = "i"
elif isinstance(prop, FeatureProperty):
fname = prop.feature().name()
idx = f"({prop.feature().nkinds()}*{fname}1 + {fname}2)"
if Types.is_scalar(prop.type()):
self.print(f"return {v}[i];")
self.print(f"return {ptr}[{idx}];")
else:
nelems = Types.number_of_elements(self.sim, prop.type())
return_values = [f"{v}[i*{nelems} + {n}]" for n in range(nelems)]
return_values = [f"{ptr}[{idx}*{nelems} + {n}]" for n in range(nelems)]
self.print(f"return {tkw}(" + ", ".join(rv for rv in return_values) + ");")
self.print.add_indent(-4)
def get_property(self, prop):
pname = prop.name()
tkw = Types.c_accessor_keyword(self.sim, prop.type())
splitname = pname.split('_')
funcname = ''.join(word.capitalize() for word in splitname)
self.print(f"{self.host_device_attr}{tkw} get{funcname}(const size_t i) const{{")
if isinstance(prop, Property):
splitname = pname.split('_')
funcname = ''.join(word.capitalize() for word in splitname)
params = "const size_t i"
elif isinstance(prop, FeatureProperty):
fname = prop.feature().name()
splitname = fname.split('_') + pname.split('_')
funcname = ''.join(word.capitalize() for word in splitname)
params = f"const size_t {fname}1, const size_t {fname}2"
self.print(f"{self.host_device_attr}{tkw} get{funcname}({params}) const{{")
if self.target.is_gpu():
self.ifdef_else("__CUDA_ARCH__", self.getter_body, [prop, True], self.getter_body, [prop, False])
......@@ -271,54 +261,64 @@ class PairsAcessor:
self.print("}")
self.print("")
def setter_body(self, prop, device=False):
self.print.add_indent(4)
pname = prop.name()
tkw = Types.c_accessor_keyword(self.sim, prop.type())
ptr = self.generate_ref_name(prop, device)
if self.target.is_gpu() and device:
v = f"{pname}_d"
else:
v = f"ps->pobj->{pname}"
if isinstance(prop, Property):
idx = "i"
elif isinstance(prop, FeatureProperty):
fname = prop.feature().name()
idx = f"({prop.feature().nkinds()}*{fname}1 + {fname}2)"
if Types.is_scalar(prop.type()):
self.print(f"{v}[i] = value;")
self.print(f"{ptr}[{idx}] = value;")
else:
nelems = Types.number_of_elements(self.sim, prop.type())
set_values = [f"{v}[i*{nelems} + {n}] = value[{n}];" for n in range(nelems)]
for sv in set_values:
self.print(sv)
for n in range(nelems):
self.print(f"{ptr}[{idx}*{nelems} + {n}] = value[{n}];")
if self.target.is_gpu():
flag = f"*{pname}_device_flag_d" if device else f"{pname}_host_flag"
pname = prop.name()
flag = f"*(dp_d->{pname}_device_flag_d)" if device else f"hp->{pname}_host_flag"
self.print(f"{flag} = true;")
self.print.add_indent(-4)
def set_property(self, prop):
pname = prop.name()
tkw = Types.c_accessor_keyword(self.sim, prop.type())
splitname = pname.split('_')
funcname = ''.join(word.capitalize() for word in splitname)
self.print(f"{self.host_device_attr}void set{funcname}(const size_t i, const {tkw} &value){{")
if isinstance(prop, Property):
splitname = pname.split('_')
funcname = ''.join(word.capitalize() for word in splitname)
self.print(f"{self.host_device_attr}void set{funcname}(const size_t i, const {tkw} &value){{")
elif isinstance(prop, FeatureProperty):
fname = prop.feature().name()
splitname = fname.split('_') + pname.split('_')
funcname = ''.join(word.capitalize() for word in splitname)
# Feature properties can only be set from host
self.print(f"{self.host_attr}void set{funcname}(const size_t {fname}1, const size_t {fname}2, const {tkw} &value){{")
if self.target.is_gpu():
self.ifdef_else("__CUDA_ARCH__", self.setter_body, [prop, True], self.setter_body, [prop, False])
if isinstance(prop, Property):
self.ifdef_else("__CUDA_ARCH__", self.setter_body, [prop, True], self.setter_body, [prop, False])
elif isinstance(prop, FeatureProperty):
self.setter_body(prop, False)
else:
self.setter_body(prop, False)
self.print("}")
self.print("")
def sync_mode_enum(self):
self.print("enum SyncMode{")
self.print(" HostAndDevice = 0,")
self.print(" Host,")
def sync_ctx_enum(self):
self.print("enum SyncContext{")
self.print(" Host = 0,")
self.print(" Device")
self.print("};")
self.print("")
def sync_property(self, prop):
pname = prop.name()
......@@ -326,56 +326,43 @@ class PairsAcessor:
splitname = pname.split('_')
funcname = ''.join(word.capitalize() for word in splitname)
self.print(f"{self.host_attr}void sync{funcname}(SyncMode sync_mode = HostAndDevice){{")
self.print(f"{self.host_attr}void sync{funcname}(SyncContext sync_ctx = Host, bool overwrite = false){{")
if self.target.is_gpu():
self.print.add_indent(4)
self.print(f"{pname}_d = ps->pobj->{pname}_d;")
self.print(f"cudaMemcpy(&{pname}_device_flag_h, {pname}_device_flag_d, sizeof(bool), cudaMemcpyDeviceToHost);")
self.print(f"cudaMemcpy(&(hp->{pname}_device_flag_h), dp_h->{pname}_device_flag_d, sizeof(bool), cudaMemcpyDeviceToHost);")
self.print("")
#####################################################################################################################
#####################################################################################################################
# self.print(f"if (({pname}_host_flag && {pname}_device_flag_h) || ")
# self.print.add_indent(4)
# self.print(f"({pname}_host_flag && !ps->pairs_runtime->getPropFlags()->isHostFlagSet({pid})) ||")
# self.print(f"({pname}_device_flag_h && !ps->pairs_runtime->getPropFlags()->isDeviceFlagSet({pid}))){{")
# self.print(f"PAIRS_ERROR(\"OUT OF SYNC! Both host and device versions of {pname} are in a modified state.\\n\");")
# self.print("exit(-1);")
# self.print.add_indent(-4)
# self.print("}")
# self.print("")
self.print(f"if ({pname}_host_flag && {pname}_device_flag_h){{")
self.print(f"if (hp->{pname}_host_flag && hp->{pname}_device_flag_h){{")
self.print(f" PAIRS_ERROR(\"OUT OF SYNC 1! Both host and device versions of {pname} are in a modified state.\\n\");")
self.print(" exit(-1);")
self.print("}")
self.print("")
self.print(f"if ({pname}_host_flag && !ps->pairs_runtime->getPropFlags()->isHostFlagSet({pid})){{")
self.print(f" PAIRS_ERROR(\"OUT OF SYNC 2! Both host and device versions of {pname} are in a modified state.\\n\");")
self.print(" exit(-1);")
self.print(f"else if(sync_ctx==Host && overwrite==false){{")
self.print(f" if (hp->{pname}_host_flag && !ps->pairs_runtime->getPropFlags()->isHostFlagSet({pid})){{")
self.print(f" PAIRS_ERROR(\"OUT OF SYNC 2! Did you forget to sync{funcname}(Host) before calling set{funcname} from host? Use sync{funcname}(Host,true) if you want to overwrite {pname} values in host.\\n\");")
self.print(" exit(-1);")
self.print(" }")
self.print("}")
self.print("")
self.print(f"if ({pname}_device_flag_h && !ps->pairs_runtime->getPropFlags()->isDeviceFlagSet({pid})){{")
self.print(f" PAIRS_ERROR(\"OUT OF SYNC 3! Both host and device versions of {pname} are in a modified state.\\n\");")
self.print(" exit(-1);")
self.print(f"else if(sync_ctx==Device && overwrite==false){{")
self.print(f" if (hp->{pname}_device_flag_h && !ps->pairs_runtime->getPropFlags()->isDeviceFlagSet({pid})){{")
self.print(f" PAIRS_ERROR(\"OUT OF SYNC 3! Did you forget to sync{funcname}(Device) before calling set{funcname} from device? Use sync{funcname}(Device,true) if you want to overwrite {pname} values in device.\\n\");")
self.print(" exit(-1);")
self.print(" }")
self.print("}")
self.print("")
#####################################################################################################################
#####################################################################################################################
self.print(f"if ({pname}_host_flag){{")
self.print(f"if (hp->{pname}_host_flag){{")
self.print(f" ps->pairs_runtime->getPropFlags()->setHostFlag({pid});")
self.print(f" ps->pairs_runtime->getPropFlags()->clearDeviceFlag({pid});")
self.print("}")
self.print(f"else if ({pname}_device_flag_h){{")
self.print(f"else if (hp->{pname}_device_flag_h){{")
self.print(f" ps->pairs_runtime->getPropFlags()->setDeviceFlag({pid});")
self.print(f" ps->pairs_runtime->getPropFlags()->clearHostFlag({pid});")
self.print("}")
......@@ -384,64 +371,87 @@ class PairsAcessor:
nelems = Types.number_of_elements(self.sim, prop.type())
tkw = Types.c_keyword(self.sim, prop.type())
self.print(f"if (sync_mode==Device || sync_mode==HostAndDevice) {{")
self.print(f" if (ps->pairs_runtime->getPropFlags()->isHostFlagSet({pid})) {{")
self.print(f" ps->pairs_runtime->copyPropertyToDevice({pid}, ReadOnly, (((ps->pobj->nlocal + ps->pobj->nghost) * {nelems}) * sizeof({tkw})));")
self.print(" }")
self.print(f"if (sync_ctx==Device) {{")
self.print(f" ps->pairs_runtime->copyPropertyToDevice({pid}, ReadOnly, (((ps->pobj->nlocal + ps->pobj->nghost) * {nelems}) * sizeof({tkw})));")
self.print("}")
self.print("")
self.print(f"if (sync_mode==Host || sync_mode==HostAndDevice) {{")
self.print(f" if (ps->pairs_runtime->getPropFlags()->isDeviceFlagSet({pid})) {{")
self.print(f" ps->pairs_runtime->copyPropertyToHost({pid}, ReadOnly, (((ps->pobj->nlocal + ps->pobj->nghost) * {nelems}) * sizeof({tkw})));")
self.print(" }")
self.print(f"if (sync_ctx==Host) {{")
self.print(f" ps->pairs_runtime->copyPropertyToHost({pid}, ReadOnly, (((ps->pobj->nlocal + ps->pobj->nghost) * {nelems}) * sizeof({tkw})));")
self.print("}")
self.print("")
self.print(f"{pname}_host_flag = false;")
self.print(f"{pname}_device_flag_h = false;")
self.print(f"cudaMemcpy({pname}_device_flag_d, &{pname}_device_flag_h, sizeof(bool), cudaMemcpyHostToDevice);")
self.print(f"hp->{pname}_host_flag = false;")
self.print(f"hp->{pname}_device_flag_h = false;")
self.print(f"cudaMemcpy(dp_h->{pname}_device_flag_d, &(hp->{pname}_device_flag_h), sizeof(bool), cudaMemcpyHostToDevice);")
self.print.add_indent(-4)
self.print("}")
self.print("")
def sync_feature_property(self, fp):
fp_id = fp.id()
fp_name = fp.name()
f_name = fp.feature().name()
splitname = f_name.split('_') + fp_name.split('_')
funcname = ''.join(word.capitalize() for word in splitname)
self.print(f"{self.host_attr}void sync{funcname}(SyncContext sync_ctx = Host){{")
if self.target.is_gpu():
self.print.add_indent(4)
self.print(f"if (hp->{fp_name}_host_flag && sync_ctx==Device) {{")
self.print(f" ps->pairs_runtime->copyFeaturePropertyToDevice({fp_id});")
self.print("}")
self.print("")
self.print(f"hp->{fp_name}_host_flag = false;")
self.print.add_indent(-4)
self.print("}")
self.print("")
def utility_funcs(self):
nlocal = "ps->pobj->nlocal"
nlocal_d = "dp_d->nlocal"
nghost = "ps->pobj->nghost"
nghost_d = "dp_d->nghost"
if self.target.is_gpu():
self.print(f"{self.host_device_attr}int size() const {{")
self.print(" #ifdef __CUDA_ARCH__")
self.print(" return *nlocal_d + *nghost_d;")
self.print(" #else")
self.print(" return ps->pobj->nlocal + ps->pobj->nghost;")
self.print(" #endif")
self.print(f" #ifdef __CUDA_ARCH__")
self.print(f" return {nlocal_d} + {nghost_d};")
self.print(f" #else")
self.print(f" return {nlocal} + {nghost};")
self.print(f" #endif")
self.print("}")
self.print("")
else:
self.print("int size() const {return ps->pobj->nlocal + ps->pobj->nghost;}")
self.print(f"int size() const {{return {nlocal} + {nghost};}}")
if self.target.is_gpu():
self.print(f"{self.host_device_attr}int nlocal() const {{")
self.print(" #ifdef __CUDA_ARCH__")
self.print(" return *nlocal_d;")
self.print(" #else")
self.print(" return ps->pobj->nlocal;")
self.print(" #endif")
self.print(f" #ifdef __CUDA_ARCH__")
self.print(f" return {nlocal_d};")
self.print(f" #else")
self.print(f" return {nlocal};")
self.print(f" #endif")
self.print("}")
self.print("")
else:
self.print("int nlocal() const {return ps->pobj->nlocal;}")
self.print(f"int nlocal() const {{return {nlocal};}}")
if self.target.is_gpu():
self.print(f"{self.host_device_attr}int nghost() const {{")
self.print(" #ifdef __CUDA_ARCH__")
self.print(" return *nghost_d;")
self.print(" #else")
self.print(" return ps->pobj->nghost;")
self.print(" #endif")
self.print(f" #ifdef __CUDA_ARCH__")
self.print(f" return {nghost_d};")
self.print(f" #else")
self.print(f" return {nghost};")
self.print(f" #endif")
self.print("}")
self.print("")
else:
self.print("int nghost() const {return ps->pobj->nghost;}")
self.print(f"int nghost() const {{return {nghost};}}")
self.print(f"{self.host_device_attr}int getInvalidIdx(){{return -1;}}")
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment