From 8ba950c911573bfa57ad53a55e7c3ed1638c035b Mon Sep 17 00:00:00 2001 From: Behzad Safaei <iwia103h@alex1.nhr.fau.de> Date: Tue, 11 Feb 2025 11:31:14 +0100 Subject: [PATCH] Revise accessor: use pointers to structs for device and host --- src/pairs/code_gen/accessor.py | 452 +++++++++++++++++---------------- 1 file changed, 231 insertions(+), 221 deletions(-) diff --git a/src/pairs/code_gen/accessor.py b/src/pairs/code_gen/accessor.py index 5948b15..41b8bd0 100644 --- a/src/pairs/code_gen/accessor.py +++ b/src/pairs/code_gen/accessor.py @@ -1,4 +1,6 @@ from pairs.ir.types import Types +from pairs.ir.features import FeatureProperty +from pairs.ir.properties import Property class PairsAcessor: def __init__(self, cgen): @@ -12,6 +14,15 @@ class PairsAcessor: def generate(self): self.print("") + if self.target.is_gpu(): + self.print("namespace pairs::internal{") + self.print.add_indent(4) + self.DeviceProps_struct() + self.HostProps_struct() + self.print.add_indent(-4) + self.print("}") + self.print("") + if self.target.is_gpu(): self.host_device_attr = "__host__ __device__ " self.host_attr = "__host__ " @@ -28,9 +39,10 @@ class PairsAcessor: self.print("public:") self.print.add_indent(4) - self.sync_mode_enum() + self.sync_ctx_enum() self.update() self.constructor() + self.destructor() for p in self.sim.properties: if (p.type()==Types.Vector) or (Types.is_scalar(p.type())): @@ -39,8 +51,8 @@ class PairsAcessor: self.sync_property(p) for fp in self.sim.feature_properties: - self.get_feature_property(fp) - self.set_feature_property(fp) + self.get_property(fp) + self.set_property(fp) self.sync_feature_property(fp) self.utility_funcs() @@ -49,184 +61,141 @@ class PairsAcessor: self.print("};") self.print("") - def get_fp_body(self, fp, device=False): + def DeviceProps_struct(self): + self.print("struct DeviceProps{") self.print.add_indent(4) - fp_name = fp.name() - f_name = fp.feature().name() - - tkw = Types.c_accessor_keyword(self.sim, fp.type()) - - if self.target.is_gpu() and device: - v = f"{fp_name}_d" - else: - v = f"ps->pobj->{fp_name}" - - idx = f"{fp.feature().nkinds()}*{f_name}1 + {f_name}2" - if Types.is_scalar(fp.type()): - self.print(f"return {v}[{idx}];") - else: - nelems = Types.number_of_elements(self.sim, fp.type()) - return_values = [f"{v}[({idx})*{nelems} + {n}]" for n in range(nelems)] - self.print(f"return {tkw}(" + ", ".join(rv for rv in return_values) + ");") - self.print.add_indent(-4) - - def get_feature_property(self, fp): - fp_name = fp.name() - tkw = Types.c_accessor_keyword(self.sim, fp.type()) - f_name = fp.feature().name() - - splitname = f_name.split('_') + fp_name.split('_') - funcname = ''.join(word.capitalize() for word in splitname) - self.print(f"{self.host_device_attr}{tkw} get{funcname}(const size_t {f_name}1, const size_t {f_name}2) const{{") - - if self.target.is_gpu(): - self.ifdef_else("__CUDA_ARCH__", self.get_fp_body, [fp, True], self.get_fp_body, [fp, False]) - else: - self.get_fp_body(fp, False) - - self.print("}") + self.print("int nlocal;") + self.print("int nghost;") self.print("") - - def set_fp_body(self, fp, device=False): - self.print.add_indent(4) - fp_name = fp.name() - f_name = fp.feature().name() - tkw = Types.c_accessor_keyword(self.sim, fp.type()) - if self.target.is_gpu() and device: - v = f"{fp_name}_d" - else: - v = f"ps->pobj->{fp_name}" + self.print("//Property device pointers") + for p in self.sim.properties: + pname = p.name() + tkw = Types.c_keyword(self.sim, p.type()) + self.print(f"{tkw} *{pname}_d;") - idx = f"{fp.feature().nkinds()}*{f_name}1 + {f_name}2" + self.print("") + self.print("//Property device flag pointers") + for p in self.sim.properties: + pname = p.name() + tkw = Types.c_keyword(self.sim, Types.Boolean) + self.print(f"{tkw} *{pname}_device_flag_d;") - if Types.is_scalar(fp.type()): - self.print(f"{v}[{idx}] = value;") - else: - nelems = Types.number_of_elements(self.sim, fp.type()) - set_values = [f"{v}[({idx})*{nelems} + {n}] = value[{n}];" for n in range(nelems)] - for sv in set_values: - self.print(sv) + self.print("") + self.print("//Feature properties on device are global") - if self.target.is_gpu(): - flag = f"*{fp_name}_device_flag_d" if device else f"{fp_name}_host_flag" - self.print(f"{flag} = true;") + self.print("") + self.print("//Feature properties have no flags on device since they can't be modified on device") self.print.add_indent(-4) + self.print("};") + self.print("") + def HostProps_struct(self): + self.print("// HostProps only contains property flags, since properties themselves can be directly accessed through ps->pobj") + self.print("// TODO: Move properties out of PairsObjects into DeviceProps and HostProps, so that all 3 structs have mutually exclusive members") + self.print("struct HostProps{") + self.print.add_indent(4) - def set_feature_property(self, fp): - fp_name = fp.name() - tkw = Types.c_accessor_keyword(self.sim, fp.type()) - f_name = fp.feature().name() + self.print("") + self.print("//Property host pointers are in PairsObjects") - splitname = f_name.split('_') + fp_name.split('_') - funcname = ''.join(word.capitalize() for word in splitname) + self.print("") + self.print("//Property host flags") + for p in self.sim.properties: + pname = p.name() + tkw = Types.c_keyword(self.sim, Types.Boolean) + self.print(f"{tkw} {pname}_host_flag = false;") - # Feature properties can only be set from host - self.print(f"{self.host_attr}void set{funcname}(const size_t {f_name}1, const size_t {f_name}2, const {tkw} &value){{") - self.set_fp_body(fp, False) - - self.print("}") self.print("") + self.print("//Property device flags") + for p in self.sim.properties: + pname = p.name() + tkw = Types.c_keyword(self.sim, Types.Boolean) + self.print(f"{tkw} {pname}_device_flag_h = false;") - def sync_feature_property(self, fp): - fp_id = fp.id() - fp_name = fp.name() - f_name = fp.feature().name() - splitname = f_name.split('_') + fp_name.split('_') - funcname = ''.join(word.capitalize() for word in splitname) - - self.print(f"{self.host_attr}void sync{funcname}(SyncMode sync_mode = HostAndDevice){{") - - if self.target.is_gpu(): - self.print.add_indent(4) - self.print(f"if ({fp_name}_host_flag) {{") - self.print(f" ps->pairs_runtime->copyFeaturePropertyToDevice({fp_id});") - self.print("}") - self.print("") - - self.print(f"{fp_name}_host_flag = false;") - self.print.add_indent(-4) + self.print("") + self.print("//Feature property host pointers are in PairsObjects") - self.print("}") self.print("") + self.print("//Feature property host flags") + for fp in self.sim.feature_properties: + fpname = fp.name() + tkw = Types.c_keyword(self.sim, Types.Boolean) + self.print(f"{tkw} {fpname}_host_flag = false;") + + self.print("") + self.print("//Feature properties have no device flags") + self.print.add_indent(-4) + self.print("};") + self.print("") def member_variables(self): self.print("PairsSimulation *ps;") - if self.target.is_gpu(): - self.print("int *nlocal_d;") - self.print("int *nghost_d;") - self.print("") - - self.print("//Properties") - for p in self.sim.properties: - pname = p.name() - tkw = Types.c_keyword(self.sim, p.type()) - self.print(f"{tkw} *{pname}_d;") - - self.print("") - self.print("//Property flags") - for p in self.sim.properties: - pname = p.name() - tkw = Types.c_keyword(self.sim, Types.Boolean) - self.print(f"{tkw} *{pname}_device_flag_d;") - self.print(f"{tkw} {pname}_device_flag_h = false;") - self.print(f"{tkw} {pname}_host_flag = false;") - - self.print("") - self.print("//Feature properties are global") - - self.print("") - self.print("//Feature property flags") - for fp in self.sim.feature_properties: - fpname = fp.name() - tkw = Types.c_keyword(self.sim, Types.Boolean) - self.print(f"{tkw} {fpname}_host_flag = false;") - - self.print("") + self.print("pairs::internal::HostProps *hp;") + self.print("pairs::internal::DeviceProps *dp_h;") + self.print("pairs::internal::DeviceProps *dp_d;") def update(self): self.print(f"{self.host_attr}void update(){{") if self.target.is_gpu(): self.print.add_indent(4) - self.print(f"cudaMemcpy(nlocal_d, &(ps->pobj->nlocal), sizeof(int), cudaMemcpyHostToDevice);") - self.print(f"cudaMemcpy(nghost_d, &(ps->pobj->nghost), sizeof(int), cudaMemcpyHostToDevice);") + self.print(f"dp_h->nlocal = ps->pobj->nlocal;") + self.print(f"dp_h->nghost = ps->pobj->nghost;") for p in self.sim.properties: pname = p.name() - self.print(f"{pname}_d = ps->pobj->{pname}_d;") + self.print(f"dp_h->{pname}_d = ps->pobj->{pname}_d;") + self.print(f"cudaMemcpy(dp_d, dp_h, sizeof(pairs::internal::DeviceProps), cudaMemcpyHostToDevice);") self.print.add_indent(-4) self.print("}") self.print("") - def constructor(self): if self.target.is_gpu(): self.print(f"{self.host_attr}PairsAccessor(PairsSimulation *ps_): ps(ps_){{") self.print.add_indent(4) - self.print(f"cudaMalloc(&nlocal_d, sizeof(int));") - self.print(f"cudaMalloc(&nghost_d, sizeof(int));") - self.print("this->update();") + self.print(f"hp = new pairs::internal::HostProps;") + self.print(f"dp_h = new pairs::internal::DeviceProps;") + self.print(f"cudaMalloc(&dp_d, sizeof(pairs::internal::DeviceProps));") for p in self.sim.properties: pname = p.name() tkw = Types.c_keyword(self.sim, Types.Boolean) - self.print(f"cudaMalloc(&{pname}_device_flag_d, sizeof({tkw}));") - self.print(f"cudaMemcpy({pname}_device_flag_d, &{pname}_device_flag_h, sizeof({tkw}), cudaMemcpyHostToDevice);") + self.print(f"cudaMalloc(&(dp_h->{pname}_device_flag_d), sizeof({tkw}));") + self.print("this->update();") self.print.add_indent(-4) self.print("}") + else: self.print("PairsAccessor(PairsSimulation *ps_): ps(ps_){}") self.print("") - + + def destructor(self): + if self.target.is_gpu(): + self.print(f"{self.host_attr}~PairsAccessor(){{") + self.print.add_indent(4) + + for p in self.sim.properties: + pname = p.name() + tkw = Types.c_keyword(self.sim, Types.Boolean) + self.print(f"cudaFree(dp_h->{pname}_device_flag_d);") + + self.print(f"delete hp;") + self.print(f"delete dp_h;") + self.print(f"cudaFree(dp_d);") + + self.print.add_indent(-4) + self.print("}") + self.print("") + def ifdef_else(self, ifdef, func1, args1, func2, args2): self.print.add_indent(4) self.print(f"#ifdef {ifdef}") @@ -235,33 +204,54 @@ class PairsAcessor: func2(*args2) self.print("#endif") self.print.add_indent(-4) + + def generate_ref_name(self, prop, device): + pname = prop.name() + + if self.target.is_gpu() and device: + if isinstance(prop, Property): + return f"dp_d->{pname}_d" + + elif isinstance(prop, FeatureProperty): + return f"{pname}_d" + else: + return f"ps->pobj->{pname}" def getter_body(self, prop, device=False): self.print.add_indent(4) - pname = prop.name() tkw = Types.c_accessor_keyword(self.sim, prop.type()) - - if self.target.is_gpu() and device: - v = f"{pname}_d" - else: - v = f"ps->pobj->{pname}" + ptr = self.generate_ref_name(prop, device) + + if isinstance(prop, Property): + idx = "i" + elif isinstance(prop, FeatureProperty): + fname = prop.feature().name() + idx = f"({prop.feature().nkinds()}*{fname}1 + {fname}2)" if Types.is_scalar(prop.type()): - self.print(f"return {v}[i];") + self.print(f"return {ptr}[{idx}];") else: nelems = Types.number_of_elements(self.sim, prop.type()) - return_values = [f"{v}[i*{nelems} + {n}]" for n in range(nelems)] + return_values = [f"{ptr}[{idx}*{nelems} + {n}]" for n in range(nelems)] self.print(f"return {tkw}(" + ", ".join(rv for rv in return_values) + ");") self.print.add_indent(-4) - def get_property(self, prop): pname = prop.name() tkw = Types.c_accessor_keyword(self.sim, prop.type()) - splitname = pname.split('_') - funcname = ''.join(word.capitalize() for word in splitname) - self.print(f"{self.host_device_attr}{tkw} get{funcname}(const size_t i) const{{") + if isinstance(prop, Property): + splitname = pname.split('_') + funcname = ''.join(word.capitalize() for word in splitname) + params = "const size_t i" + + elif isinstance(prop, FeatureProperty): + fname = prop.feature().name() + splitname = fname.split('_') + pname.split('_') + funcname = ''.join(word.capitalize() for word in splitname) + params = f"const size_t {fname}1, const size_t {fname}2" + + self.print(f"{self.host_device_attr}{tkw} get{funcname}({params}) const{{") if self.target.is_gpu(): self.ifdef_else("__CUDA_ARCH__", self.getter_body, [prop, True], self.getter_body, [prop, False]) @@ -271,54 +261,64 @@ class PairsAcessor: self.print("}") self.print("") - def setter_body(self, prop, device=False): self.print.add_indent(4) - pname = prop.name() - tkw = Types.c_accessor_keyword(self.sim, prop.type()) + ptr = self.generate_ref_name(prop, device) - if self.target.is_gpu() and device: - v = f"{pname}_d" - else: - v = f"ps->pobj->{pname}" + if isinstance(prop, Property): + idx = "i" + elif isinstance(prop, FeatureProperty): + fname = prop.feature().name() + idx = f"({prop.feature().nkinds()}*{fname}1 + {fname}2)" if Types.is_scalar(prop.type()): - self.print(f"{v}[i] = value;") + self.print(f"{ptr}[{idx}] = value;") else: nelems = Types.number_of_elements(self.sim, prop.type()) - set_values = [f"{v}[i*{nelems} + {n}] = value[{n}];" for n in range(nelems)] - for sv in set_values: - self.print(sv) + for n in range(nelems): + self.print(f"{ptr}[{idx}*{nelems} + {n}] = value[{n}];") if self.target.is_gpu(): - flag = f"*{pname}_device_flag_d" if device else f"{pname}_host_flag" + pname = prop.name() + flag = f"*(dp_d->{pname}_device_flag_d)" if device else f"hp->{pname}_host_flag" self.print(f"{flag} = true;") self.print.add_indent(-4) - def set_property(self, prop): pname = prop.name() tkw = Types.c_accessor_keyword(self.sim, prop.type()) - splitname = pname.split('_') - funcname = ''.join(word.capitalize() for word in splitname) - self.print(f"{self.host_device_attr}void set{funcname}(const size_t i, const {tkw} &value){{") + if isinstance(prop, Property): + splitname = pname.split('_') + funcname = ''.join(word.capitalize() for word in splitname) + self.print(f"{self.host_device_attr}void set{funcname}(const size_t i, const {tkw} &value){{") + + elif isinstance(prop, FeatureProperty): + fname = prop.feature().name() + splitname = fname.split('_') + pname.split('_') + funcname = ''.join(word.capitalize() for word in splitname) + # Feature properties can only be set from host + self.print(f"{self.host_attr}void set{funcname}(const size_t {fname}1, const size_t {fname}2, const {tkw} &value){{") if self.target.is_gpu(): - self.ifdef_else("__CUDA_ARCH__", self.setter_body, [prop, True], self.setter_body, [prop, False]) + if isinstance(prop, Property): + self.ifdef_else("__CUDA_ARCH__", self.setter_body, [prop, True], self.setter_body, [prop, False]) + + elif isinstance(prop, FeatureProperty): + self.setter_body(prop, False) else: self.setter_body(prop, False) self.print("}") self.print("") - def sync_mode_enum(self): - self.print("enum SyncMode{") - self.print(" HostAndDevice = 0,") - self.print(" Host,") + def sync_ctx_enum(self): + self.print("enum SyncContext{") + self.print(" Host = 0,") self.print(" Device") self.print("};") + self.print("") def sync_property(self, prop): pname = prop.name() @@ -326,56 +326,43 @@ class PairsAcessor: splitname = pname.split('_') funcname = ''.join(word.capitalize() for word in splitname) - self.print(f"{self.host_attr}void sync{funcname}(SyncMode sync_mode = HostAndDevice){{") + self.print(f"{self.host_attr}void sync{funcname}(SyncContext sync_ctx = Host, bool overwrite = false){{") if self.target.is_gpu(): self.print.add_indent(4) - self.print(f"{pname}_d = ps->pobj->{pname}_d;") - self.print(f"cudaMemcpy(&{pname}_device_flag_h, {pname}_device_flag_d, sizeof(bool), cudaMemcpyDeviceToHost);") + self.print(f"cudaMemcpy(&(hp->{pname}_device_flag_h), dp_h->{pname}_device_flag_d, sizeof(bool), cudaMemcpyDeviceToHost);") self.print("") - ##################################################################################################################### ##################################################################################################################### - # self.print(f"if (({pname}_host_flag && {pname}_device_flag_h) || ") - # self.print.add_indent(4) - # self.print(f"({pname}_host_flag && !ps->pairs_runtime->getPropFlags()->isHostFlagSet({pid})) ||") - # self.print(f"({pname}_device_flag_h && !ps->pairs_runtime->getPropFlags()->isDeviceFlagSet({pid}))){{") - # self.print(f"PAIRS_ERROR(\"OUT OF SYNC! Both host and device versions of {pname} are in a modified state.\\n\");") - # self.print("exit(-1);") - # self.print.add_indent(-4) - # self.print("}") - # self.print("") - - - self.print(f"if ({pname}_host_flag && {pname}_device_flag_h){{") + + self.print(f"if (hp->{pname}_host_flag && hp->{pname}_device_flag_h){{") self.print(f" PAIRS_ERROR(\"OUT OF SYNC 1! Both host and device versions of {pname} are in a modified state.\\n\");") self.print(" exit(-1);") self.print("}") - self.print("") - - self.print(f"if ({pname}_host_flag && !ps->pairs_runtime->getPropFlags()->isHostFlagSet({pid})){{") - self.print(f" PAIRS_ERROR(\"OUT OF SYNC 2! Both host and device versions of {pname} are in a modified state.\\n\");") - self.print(" exit(-1);") + self.print(f"else if(sync_ctx==Host && overwrite==false){{") + self.print(f" if (hp->{pname}_host_flag && !ps->pairs_runtime->getPropFlags()->isHostFlagSet({pid})){{") + self.print(f" PAIRS_ERROR(\"OUT OF SYNC 2! Did you forget to sync{funcname}(Host) before calling set{funcname} from host? Use sync{funcname}(Host,true) if you want to overwrite {pname} values in host.\\n\");") + self.print(" exit(-1);") + self.print(" }") self.print("}") - self.print("") - - self.print(f"if ({pname}_device_flag_h && !ps->pairs_runtime->getPropFlags()->isDeviceFlagSet({pid})){{") - self.print(f" PAIRS_ERROR(\"OUT OF SYNC 3! Both host and device versions of {pname} are in a modified state.\\n\");") - self.print(" exit(-1);") + self.print(f"else if(sync_ctx==Device && overwrite==false){{") + self.print(f" if (hp->{pname}_device_flag_h && !ps->pairs_runtime->getPropFlags()->isDeviceFlagSet({pid})){{") + self.print(f" PAIRS_ERROR(\"OUT OF SYNC 3! Did you forget to sync{funcname}(Device) before calling set{funcname} from device? Use sync{funcname}(Device,true) if you want to overwrite {pname} values in device.\\n\");") + self.print(" exit(-1);") + self.print(" }") self.print("}") self.print("") ##################################################################################################################### ##################################################################################################################### - - self.print(f"if ({pname}_host_flag){{") + self.print(f"if (hp->{pname}_host_flag){{") self.print(f" ps->pairs_runtime->getPropFlags()->setHostFlag({pid});") self.print(f" ps->pairs_runtime->getPropFlags()->clearDeviceFlag({pid});") self.print("}") - self.print(f"else if ({pname}_device_flag_h){{") + self.print(f"else if (hp->{pname}_device_flag_h){{") self.print(f" ps->pairs_runtime->getPropFlags()->setDeviceFlag({pid});") self.print(f" ps->pairs_runtime->getPropFlags()->clearHostFlag({pid});") self.print("}") @@ -384,64 +371,87 @@ class PairsAcessor: nelems = Types.number_of_elements(self.sim, prop.type()) tkw = Types.c_keyword(self.sim, prop.type()) - self.print(f"if (sync_mode==Device || sync_mode==HostAndDevice) {{") - self.print(f" if (ps->pairs_runtime->getPropFlags()->isHostFlagSet({pid})) {{") - self.print(f" ps->pairs_runtime->copyPropertyToDevice({pid}, ReadOnly, (((ps->pobj->nlocal + ps->pobj->nghost) * {nelems}) * sizeof({tkw})));") - self.print(" }") + self.print(f"if (sync_ctx==Device) {{") + self.print(f" ps->pairs_runtime->copyPropertyToDevice({pid}, ReadOnly, (((ps->pobj->nlocal + ps->pobj->nghost) * {nelems}) * sizeof({tkw})));") self.print("}") self.print("") - self.print(f"if (sync_mode==Host || sync_mode==HostAndDevice) {{") - self.print(f" if (ps->pairs_runtime->getPropFlags()->isDeviceFlagSet({pid})) {{") - self.print(f" ps->pairs_runtime->copyPropertyToHost({pid}, ReadOnly, (((ps->pobj->nlocal + ps->pobj->nghost) * {nelems}) * sizeof({tkw})));") - self.print(" }") + self.print(f"if (sync_ctx==Host) {{") + self.print(f" ps->pairs_runtime->copyPropertyToHost({pid}, ReadOnly, (((ps->pobj->nlocal + ps->pobj->nghost) * {nelems}) * sizeof({tkw})));") self.print("}") self.print("") - self.print(f"{pname}_host_flag = false;") - self.print(f"{pname}_device_flag_h = false;") - self.print(f"cudaMemcpy({pname}_device_flag_d, &{pname}_device_flag_h, sizeof(bool), cudaMemcpyHostToDevice);") + self.print(f"hp->{pname}_host_flag = false;") + self.print(f"hp->{pname}_device_flag_h = false;") + self.print(f"cudaMemcpy(dp_h->{pname}_device_flag_d, &(hp->{pname}_device_flag_h), sizeof(bool), cudaMemcpyHostToDevice);") self.print.add_indent(-4) self.print("}") self.print("") + + def sync_feature_property(self, fp): + fp_id = fp.id() + fp_name = fp.name() + f_name = fp.feature().name() + splitname = f_name.split('_') + fp_name.split('_') + funcname = ''.join(word.capitalize() for word in splitname) + + self.print(f"{self.host_attr}void sync{funcname}(SyncContext sync_ctx = Host){{") + + if self.target.is_gpu(): + self.print.add_indent(4) + self.print(f"if (hp->{fp_name}_host_flag && sync_ctx==Device) {{") + self.print(f" ps->pairs_runtime->copyFeaturePropertyToDevice({fp_id});") + self.print("}") + self.print("") + + self.print(f"hp->{fp_name}_host_flag = false;") + self.print.add_indent(-4) + + self.print("}") + self.print("") def utility_funcs(self): + nlocal = "ps->pobj->nlocal" + nlocal_d = "dp_d->nlocal" + nghost = "ps->pobj->nghost" + nghost_d = "dp_d->nghost" + if self.target.is_gpu(): self.print(f"{self.host_device_attr}int size() const {{") - self.print(" #ifdef __CUDA_ARCH__") - self.print(" return *nlocal_d + *nghost_d;") - self.print(" #else") - self.print(" return ps->pobj->nlocal + ps->pobj->nghost;") - self.print(" #endif") + self.print(f" #ifdef __CUDA_ARCH__") + self.print(f" return {nlocal_d} + {nghost_d};") + self.print(f" #else") + self.print(f" return {nlocal} + {nghost};") + self.print(f" #endif") self.print("}") self.print("") else: - self.print("int size() const {return ps->pobj->nlocal + ps->pobj->nghost;}") + self.print(f"int size() const {{return {nlocal} + {nghost};}}") if self.target.is_gpu(): self.print(f"{self.host_device_attr}int nlocal() const {{") - self.print(" #ifdef __CUDA_ARCH__") - self.print(" return *nlocal_d;") - self.print(" #else") - self.print(" return ps->pobj->nlocal;") - self.print(" #endif") + self.print(f" #ifdef __CUDA_ARCH__") + self.print(f" return {nlocal_d};") + self.print(f" #else") + self.print(f" return {nlocal};") + self.print(f" #endif") self.print("}") self.print("") else: - self.print("int nlocal() const {return ps->pobj->nlocal;}") + self.print(f"int nlocal() const {{return {nlocal};}}") if self.target.is_gpu(): self.print(f"{self.host_device_attr}int nghost() const {{") - self.print(" #ifdef __CUDA_ARCH__") - self.print(" return *nghost_d;") - self.print(" #else") - self.print(" return ps->pobj->nghost;") - self.print(" #endif") + self.print(f" #ifdef __CUDA_ARCH__") + self.print(f" return {nghost_d};") + self.print(f" #else") + self.print(f" return {nghost};") + self.print(f" #endif") self.print("}") self.print("") else: - self.print("int nghost() const {return ps->pobj->nghost;}") + self.print(f"int nghost() const {{return {nghost};}}") self.print(f"{self.host_device_attr}int getInvalidIdx(){{return -1;}}") -- GitLab