diff --git a/runtime/devices/cuda.cu b/runtime/devices/cuda.cu
index 9bb443742b3b09cb9769398f4aee080c5b0d132d..687bb76600376008c95e6eda496c763c29b9938d 100644
--- a/runtime/devices/cuda.cu
+++ b/runtime/devices/cuda.cu
@@ -29,6 +29,11 @@ __host__ void device_free(void *ptr) {
     CUDA_ASSERT(cudaFree(ptr));
 }
 
+__host__ void device_synchronize() {
+    CUDA_ASSERT(cudaPeekAtLastError());
+    CUDA_ASSERT(cudaDeviceSynchronize());
+}
+
 __host__ void copy_to_device(const void *h_ptr, void *d_ptr, size_t count) {
     CUDA_ASSERT(cudaMemcpy(d_ptr, h_ptr, count, cudaMemcpyHostToDevice));
 }
diff --git a/runtime/devices/device.hpp b/runtime/devices/device.hpp
index 3ef88ef29d9a46c3e15b5a0a8ec9aca76486dfc5..e48d0af95f9d32fc9318e16a12af0b5192f346ed 100644
--- a/runtime/devices/device.hpp
+++ b/runtime/devices/device.hpp
@@ -14,6 +14,7 @@ void cuda_assert(cudaError_t err, const char *file, int line);
 __host__ void *device_alloc(size_t size);
 __host__ void *device_realloc(void *ptr, size_t size);
 __host__ void device_free(void *ptr);
+__host__ void device_synchronize();
 __host__ void copy_to_device(const void *h_ptr, void *d_ptr, size_t count);
 __host__ void copy_to_host(const void *d_ptr, void *h_ptr, size_t count);
 __host__ void copy_static_symbol_to_device(void *h_ptr, const void *d_ptr, size_t count);
diff --git a/runtime/devices/dummy.cpp b/runtime/devices/dummy.cpp
index de6a8df0d985ec20770dcf352da871acf404fd31..909d50ee7cddce3e3cc8131616e2051f7ea2b859 100644
--- a/runtime/devices/dummy.cpp
+++ b/runtime/devices/dummy.cpp
@@ -5,6 +5,7 @@ namespace pairs {
 void *device_alloc(size_t size) { return nullptr; }
 void *device_realloc(void *ptr, size_t size) { return nullptr; }
 void device_free(void *ptr) {}
+void device_synchronize() {}
 void copy_to_device(void const *h_ptr, void *d_ptr, size_t count) {}
 void copy_to_host(void const *d_ptr, void *h_ptr, size_t count) {}
 void copy_static_symbol_to_device(void *h_ptr, const void *d_ptr, size_t count) {}
diff --git a/runtime/pairs.hpp b/runtime/pairs.hpp
index 7be160e2609e3a8eb5e5f74eb8fda1533c2a2458..f96ec88dbf21eaede0ca92e1c33582c31da79262 100644
--- a/runtime/pairs.hpp
+++ b/runtime/pairs.hpp
@@ -112,6 +112,7 @@ public:
         real_t *recv_buf, const int *recv_offsets, const int *nrecv);
 
     void fillCommunicationArrays(int neighbor_ranks[], int pbc[], real_t subdom[]);
+    void sync() { device_synchronize(); }
 };
 
 template<typename T_ptr>
diff --git a/src/pairs/code_gen/cgen.py b/src/pairs/code_gen/cgen.py
index 9e40f1d21be03578d700428a9cda7ac112253d94..ec4018243e141250350dc7a5b01585209c9f0f98 100644
--- a/src/pairs/code_gen/cgen.py
+++ b/src/pairs/code_gen/cgen.py
@@ -97,34 +97,34 @@ class CGen:
             self.print("}")
 
         else:
-            module_params = ""
+            module_params = "PairsSimulation *pairs"
             for var in module.read_only_variables():
                 type_kw = Types.c_keyword(var.type())
                 decl = f"{type_kw} {var.name()}"
-                module_params += decl if len(module_params) <= 0 else f", {decl}"
+                module_params += f", {decl}"
 
             for var in module.write_variables():
                 type_kw = Types.c_keyword(var.type())
                 decl = f"{type_kw} *{var.name()}"
-                module_params += decl if len(module_params) <= 0 else f", {decl}"
+                module_params += f", {decl}"
 
             for array in module.arrays():
                 type_kw = Types.c_keyword(array.type())
                 decl = f"{type_kw} *{array.name()}"
-                module_params += decl if len(module_params) <= 0 else f", {decl}"
+                module_params += f", {decl}"
 
                 if array in module.host_references():
                     decl = f"{type_kw} *h_{array.name()}"
-                    module_params += decl if len(module_params) <= 0 else f", {decl}"
+                    module_params += f", {decl}"
 
             for prop in module.properties():
                 type_kw = Types.c_keyword(prop.type())
                 decl = f"{type_kw} *{prop.name()}"
-                module_params += decl if len(module_params) <= 0 else f", {decl}"
+                module_params += f", {decl}"
 
                 if prop in module.host_references():
                     decl = f"{type_kw} *h_{prop.name()}"
-                    module_params += decl if len(module_params) <= 0 else f", {decl}"
+                    module_params += f", {decl}"
 
             self.print(f"void {module.name}({module_params}) {{")
 
@@ -396,35 +396,36 @@ class CGen:
             self.print(f"if({nblocks} > 0 && {threads_per_block} > 0) {{")
             self.print.add_indent(4)
             self.print(f"{kernel.name}<<<{nblocks}, {threads_per_block}>>>({kernel_params});")
+            self.print("pairs->sync();")
             self.print.add_indent(-4)
             self.print("}")
 
         if isinstance(ast_node, ModuleCall):
             module = ast_node.module
-            module_params = ""
+            module_params = "pairs"
             device_cond = module.run_on_device and self.target.is_gpu()
 
             for var in module.read_only_variables():
                 decl = var.name()
-                module_params += decl if len(module_params) <= 0 else f", {decl}"
+                module_params += f", {decl}"
 
             for var in module.write_variables():
                 decl = f"rv_{var.name()}.getDevicePointer()" if device_cond and var.device_flag else f"&{var.name()}"
-                module_params += decl if len(module_params) <= 0 else f", {decl}"
+                module_params += f", {decl}"
 
             for array in module.arrays():
                 decl = f"d_{array.name()}" if device_cond else array.name()
                 module_params += decl if len(module_params) <= 0 else f", {decl}"
                 if array in module.host_references():
                     decl = array.name()
-                    module_params += decl if len(module_params) <= 0 else f", {decl}"
+                    module_params += f", {decl}"
 
             for prop in module.properties():
                 decl = f"d_{prop.name()}" if device_cond else prop.name()
-                module_params += decl if len(module_params) <= 0 else f", {decl}"
+                module_params += f", {decl}"
                 if prop in module.host_references():
                     decl = prop.name()
-                    module_params += decl if len(module_params) <= 0 else f", {decl}"
+                    module_params += f", {decl}"
 
             self.print(f"{module.name}({module_params});")