diff --git a/runtime/devices/cuda.cu b/runtime/devices/cuda.cu index 9bb443742b3b09cb9769398f4aee080c5b0d132d..687bb76600376008c95e6eda496c763c29b9938d 100644 --- a/runtime/devices/cuda.cu +++ b/runtime/devices/cuda.cu @@ -29,6 +29,11 @@ __host__ void device_free(void *ptr) { CUDA_ASSERT(cudaFree(ptr)); } +__host__ void device_synchronize() { + CUDA_ASSERT(cudaPeekAtLastError()); + CUDA_ASSERT(cudaDeviceSynchronize()); +} + __host__ void copy_to_device(const void *h_ptr, void *d_ptr, size_t count) { CUDA_ASSERT(cudaMemcpy(d_ptr, h_ptr, count, cudaMemcpyHostToDevice)); } diff --git a/runtime/devices/device.hpp b/runtime/devices/device.hpp index 3ef88ef29d9a46c3e15b5a0a8ec9aca76486dfc5..e48d0af95f9d32fc9318e16a12af0b5192f346ed 100644 --- a/runtime/devices/device.hpp +++ b/runtime/devices/device.hpp @@ -14,6 +14,7 @@ void cuda_assert(cudaError_t err, const char *file, int line); __host__ void *device_alloc(size_t size); __host__ void *device_realloc(void *ptr, size_t size); __host__ void device_free(void *ptr); +__host__ void device_synchronize(); __host__ void copy_to_device(const void *h_ptr, void *d_ptr, size_t count); __host__ void copy_to_host(const void *d_ptr, void *h_ptr, size_t count); __host__ void copy_static_symbol_to_device(void *h_ptr, const void *d_ptr, size_t count); diff --git a/runtime/devices/dummy.cpp b/runtime/devices/dummy.cpp index de6a8df0d985ec20770dcf352da871acf404fd31..909d50ee7cddce3e3cc8131616e2051f7ea2b859 100644 --- a/runtime/devices/dummy.cpp +++ b/runtime/devices/dummy.cpp @@ -5,6 +5,7 @@ namespace pairs { void *device_alloc(size_t size) { return nullptr; } void *device_realloc(void *ptr, size_t size) { return nullptr; } void device_free(void *ptr) {} +void device_synchronize() {} void copy_to_device(void const *h_ptr, void *d_ptr, size_t count) {} void copy_to_host(void const *d_ptr, void *h_ptr, size_t count) {} void copy_static_symbol_to_device(void *h_ptr, const void *d_ptr, size_t count) {} diff --git a/runtime/pairs.hpp b/runtime/pairs.hpp index 7be160e2609e3a8eb5e5f74eb8fda1533c2a2458..f96ec88dbf21eaede0ca92e1c33582c31da79262 100644 --- a/runtime/pairs.hpp +++ b/runtime/pairs.hpp @@ -112,6 +112,7 @@ public: real_t *recv_buf, const int *recv_offsets, const int *nrecv); void fillCommunicationArrays(int neighbor_ranks[], int pbc[], real_t subdom[]); + void sync() { device_synchronize(); } }; template<typename T_ptr> diff --git a/src/pairs/code_gen/cgen.py b/src/pairs/code_gen/cgen.py index 9e40f1d21be03578d700428a9cda7ac112253d94..ec4018243e141250350dc7a5b01585209c9f0f98 100644 --- a/src/pairs/code_gen/cgen.py +++ b/src/pairs/code_gen/cgen.py @@ -97,34 +97,34 @@ class CGen: self.print("}") else: - module_params = "" + module_params = "PairsSimulation *pairs" for var in module.read_only_variables(): type_kw = Types.c_keyword(var.type()) decl = f"{type_kw} {var.name()}" - module_params += decl if len(module_params) <= 0 else f", {decl}" + module_params += f", {decl}" for var in module.write_variables(): type_kw = Types.c_keyword(var.type()) decl = f"{type_kw} *{var.name()}" - module_params += decl if len(module_params) <= 0 else f", {decl}" + module_params += f", {decl}" for array in module.arrays(): type_kw = Types.c_keyword(array.type()) decl = f"{type_kw} *{array.name()}" - module_params += decl if len(module_params) <= 0 else f", {decl}" + module_params += f", {decl}" if array in module.host_references(): decl = f"{type_kw} *h_{array.name()}" - module_params += decl if len(module_params) <= 0 else f", {decl}" + module_params += f", {decl}" for prop in module.properties(): type_kw = Types.c_keyword(prop.type()) decl = f"{type_kw} *{prop.name()}" - module_params += decl if len(module_params) <= 0 else f", {decl}" + module_params += f", {decl}" if prop in module.host_references(): decl = f"{type_kw} *h_{prop.name()}" - module_params += decl if len(module_params) <= 0 else f", {decl}" + module_params += f", {decl}" self.print(f"void {module.name}({module_params}) {{") @@ -396,35 +396,36 @@ class CGen: self.print(f"if({nblocks} > 0 && {threads_per_block} > 0) {{") self.print.add_indent(4) self.print(f"{kernel.name}<<<{nblocks}, {threads_per_block}>>>({kernel_params});") + self.print("pairs->sync();") self.print.add_indent(-4) self.print("}") if isinstance(ast_node, ModuleCall): module = ast_node.module - module_params = "" + module_params = "pairs" device_cond = module.run_on_device and self.target.is_gpu() for var in module.read_only_variables(): decl = var.name() - module_params += decl if len(module_params) <= 0 else f", {decl}" + module_params += f", {decl}" for var in module.write_variables(): decl = f"rv_{var.name()}.getDevicePointer()" if device_cond and var.device_flag else f"&{var.name()}" - module_params += decl if len(module_params) <= 0 else f", {decl}" + module_params += f", {decl}" for array in module.arrays(): decl = f"d_{array.name()}" if device_cond else array.name() module_params += decl if len(module_params) <= 0 else f", {decl}" if array in module.host_references(): decl = array.name() - module_params += decl if len(module_params) <= 0 else f", {decl}" + module_params += f", {decl}" for prop in module.properties(): decl = f"d_{prop.name()}" if device_cond else prop.name() - module_params += decl if len(module_params) <= 0 else f", {decl}" + module_params += f", {decl}" if prop in module.host_references(): decl = prop.name() - module_params += decl if len(module_params) <= 0 else f", {decl}" + module_params += f", {decl}" self.print(f"{module.name}({module_params});")