diff --git a/src/lbmpy/advanced_streaming/communication.py b/src/lbmpy/advanced_streaming/communication.py
index 1d03e9f9907aa8b6852e4f1e269ec08ab3e047f8..c02b6e2229d3f1a3da0e81cfdd70a6e5c5ff5e87 100644
--- a/src/lbmpy/advanced_streaming/communication.py
+++ b/src/lbmpy/advanced_streaming/communication.py
@@ -186,7 +186,6 @@ def get_communication_slices(
 def periodic_pdf_gpu_copy_kernel(pdf_field, src_slice, dst_slice, domain_size=None):
     """Generate a GPU kernel which copies all values from one slice of a field
     to another non-overlapping slice."""
-    # from pystencils.gpu.kernelcreation import create_cuda_kernel
     from pystencils import create_kernel
 
     pdf_idx = src_slice[-1]
@@ -206,7 +205,7 @@ def periodic_pdf_gpu_copy_kernel(pdf_field, src_slice, dst_slice, domain_size=No
         return s.start if isinstance(s, slice) else s
 
     def _stop(s):
-        return s.stop if isinstance(s, slice) else s + 1
+        return s.stop if isinstance(s, slice) else s
 
     offset = [
         _start(s1) - _start(s2)
@@ -223,7 +222,9 @@ def periodic_pdf_gpu_copy_kernel(pdf_field, src_slice, dst_slice, domain_size=No
         ]
     )
     config = CreateKernelConfig(
-        iteration_slice=dst_slice, skip_independence_check=True, target=Target.GPU
+        iteration_slice=dst_slice,
+        skip_independence_check=True,
+        target=Target.GPU,
     )
 
     ast = create_kernel(copy_eq, config=config)
diff --git a/src/lbmpy/moment_transforms/rawmomenttransforms.py b/src/lbmpy/moment_transforms/rawmomenttransforms.py
index 11013841d0cda6cec0f57bdf5da40cbfd2fc302d..63952347c1075aab1bdfe473ecd92679d466f986 100644
--- a/src/lbmpy/moment_transforms/rawmomenttransforms.py
+++ b/src/lbmpy/moment_transforms/rawmomenttransforms.py
@@ -172,7 +172,7 @@ class PdfsToMomentsByMatrixTransform(AbstractRawMomentTransform):
 
     #   ----------------------------- Private Members -----------------------------
 
-    @ property
+    @property
     def _default_simplification(self):
         forward_simp = SimplificationStrategy()
         # forward_simp.add(substitute_moments_in_conserved_quantity_equations)
@@ -218,7 +218,7 @@ class PdfsToMomentsByChimeraTransform(AbstractRawMomentTransform):
                                                                                 self.moment_polynomials)
         self.poly_to_mono_matrix = self.mono_to_poly_matrix.inv()
 
-    @ property
+    @property
     def absorbs_conserved_quantity_equations(self):
         return True
 
@@ -414,7 +414,7 @@ class PdfsToMomentsByChimeraTransform(AbstractRawMomentTransform):
 
     #   ----------------------------- Private Members -----------------------------
 
-    @ property
+    @property
     def _default_simplification(self):
         from lbmpy.methods.momentbased.momentbasedsimplifications import (
             substitute_moments_in_conserved_quantity_equations,
diff --git a/tests/advanced_streaming/test_communication.py b/tests/advanced_streaming/test_communication.py
index a63fbbe486391f699d114ec3901ed61573b48922..9328e8963e42dac8b681647e5f72d575d20049ef 100644
--- a/tests/advanced_streaming/test_communication.py
+++ b/tests/advanced_streaming/test_communication.py
@@ -9,6 +9,7 @@ from lbmpy.advanced_streaming.communication import (
     get_communication_slices,
     _fix_length_one_slices,
     LBMPeriodicityHandling,
+    periodic_pdf_gpu_copy_kernel,
 )
 from lbmpy.advanced_streaming.utility import streaming_patterns, Timestep
 from lbmpy.enums import Stencil
@@ -87,6 +88,47 @@ def test_pull_communication_slices(stencil):
         assert dst == gl_slice
 
 
+@pytest.mark.parametrize("direction", LBStencil(Stencil.D3Q27).stencil_entries)
+@pytest.mark.parametrize("pull", [False, True])
+def test_gpu_comm_kernels(direction: tuple, pull: bool):
+    pytest.importorskip("cupy")
+
+    stencil = LBStencil(Stencil.D3Q27)
+    inv_dir = stencil[stencil.inverse_index(direction)]
+    target = ps.Target.GPU
+
+    domain_size = (4,) * stencil.D
+
+    dh: ps.datahandling.SerialDataHandling = ps.create_data_handling(
+        domain_size,
+        periodicity=(True,) * stencil.D,
+        parallel=False,
+        default_target=target,
+    )
+
+    field = dh.add_array("field", values_per_cell=2)
+
+    if pull:
+        dst_slice = get_ghost_region_slice(inv_dir)
+        src_slice = get_slice_before_ghost_layer(direction)
+    else:
+        dst_slice = get_slice_before_ghost_layer(direction)
+        src_slice = get_ghost_region_slice(inv_dir)
+
+    src_slice += (1,)
+    dst_slice += (1,)
+
+    kernel = periodic_pdf_gpu_copy_kernel(field, src_slice, dst_slice)
+
+    dh.cpu_arrays[field.name][src_slice] = 42.0
+    dh.all_to_gpu()
+
+    dh.run_kernel(kernel)
+
+    dh.all_to_cpu()
+    np.testing.assert_equal(dh.cpu_arrays[field.name][dst_slice], 42.0)
+
+
 @pytest.mark.parametrize("stencil", [Stencil.D2Q9, Stencil.D3Q19])
 @pytest.mark.parametrize("streaming_pattern", streaming_patterns)
 def test_direct_copy_and_kernels_equivalence(stencil: Stencil, streaming_pattern: str):
@@ -106,6 +148,9 @@ def test_direct_copy_and_kernels_equivalence(stencil: Stencil, streaming_pattern
     pdfs_a = dh.add_array("pdfs_a", values_per_cell=stencil.Q)
     pdfs_b = dh.add_array("pdfs_b", values_per_cell=stencil.Q)
 
+    dh.fill(pdfs_a.name, 0.0, ghost_layers=True)
+    dh.fill(pdfs_b.name, 0.0, ghost_layers=True)
+
     for q in range(stencil.Q):
         sl = ps.make_slice[:4, :4, q] if stencil.D == 2 else ps.make_slice[:4, :4, :4, q]
         dh.cpu_arrays[pdfs_a.name][sl] = q