diff --git a/apps/benchmarks/CMakeLists.txt b/apps/benchmarks/CMakeLists.txt
index 959f4e369f193fd85bfc97d89c9f5d4608b4bbbd..17ae659d7291558b0392bffe1b6457b82cc60449 100644
--- a/apps/benchmarks/CMakeLists.txt
+++ b/apps/benchmarks/CMakeLists.txt
@@ -17,6 +17,7 @@ add_subdirectory( PoiseuilleChannel )
 add_subdirectory( ProbeVsExtraMessage )
 add_subdirectory( SchaeferTurek )
 add_subdirectory( UniformGrid )
+add_subdirectory( CompactInterpolation )
 
 if ( WALBERLA_BUILD_WITH_PYTHON )
    add_subdirectory( FieldCommunication )
@@ -26,7 +27,6 @@ if ( WALBERLA_BUILD_WITH_PYTHON )
       add_subdirectory( PhaseFieldAllenCahn )
       add_subdirectory( NonUniformGridCPU )
       add_subdirectory( TurbulentChannel )
-      add_subdirectory( CompactInterpolation )
    endif()
 
    if ( WALBERLA_BUILD_WITH_CODEGEN AND WALBERLA_BUILD_WITH_GPU_SUPPORT )
diff --git a/apps/benchmarks/CompactInterpolation/CMakeLists.txt b/apps/benchmarks/CompactInterpolation/CMakeLists.txt
index 101c5e3a474ccdeea5744b867208a73d76e142e9..a3333c5b8d3e3853ed9d9a82d09f17b835126afe 100644
--- a/apps/benchmarks/CompactInterpolation/CMakeLists.txt
+++ b/apps/benchmarks/CompactInterpolation/CMakeLists.txt
@@ -14,4 +14,4 @@ waLBerla_generate_target_from_python(NAME CompactInterpolationGenerated
         CompactInterpolationInfoHeader.h)
 waLBerla_add_executable( NAME CompactInterpolation
                          FILES CompactInterpolation.cpp LdcSetup.h
-                         DEPENDS blockforest boundary core gpu domain_decomposition field geometry lbm_generated python_coupling timeloop vtk CompactInterpolationGenerated )
\ No newline at end of file
+                         DEPENDS blockforest boundary core gpu domain_decomposition field geometry lbm_generated timeloop vtk CompactInterpolationGenerated )
\ No newline at end of file
diff --git a/apps/benchmarks/CompactInterpolation/CompactInterpolation.cpp b/apps/benchmarks/CompactInterpolation/CompactInterpolation.cpp
index bf2d5e602fad990c78c663fa9264b3eff1175ce3..70a34ca4fb822409693d15a2eceff90a1b274701 100644
--- a/apps/benchmarks/CompactInterpolation/CompactInterpolation.cpp
+++ b/apps/benchmarks/CompactInterpolation/CompactInterpolation.cpp
@@ -280,8 +280,8 @@ int main(int argc, char** argv)
       timeLoop.addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
       vtk::writeDomainDecomposition(blocks, "domain_decomposition", "vtk_out", "write_call", true, true, 0);
    }
-   LBMMeshRefinement.add2StepRefinementToTimeLoop(timeLoop);
-   //LBMMeshRefinement.addRefinementToTimeLoop(timeLoop);
+   //LBMMeshRefinement.add2StepRefinementToTimeLoop(timeLoop);
+   LBMMeshRefinement.addRefinementToTimeLoop(timeLoop);
 
    //LBMMeshRefinement.add2StepCommunicationToTimeLoop(timeLoop);
 
diff --git a/python/lbmpy_walberla/interpolation_methods.py b/python/lbmpy_walberla/interpolation_methods.py
index d4d4e346d9d6e3438733912a8ff4cddd3ba12a9b..f597342dd34c94a2f818182a4acd319d79113155 100644
--- a/python/lbmpy_walberla/interpolation_methods.py
+++ b/python/lbmpy_walberla/interpolation_methods.py
@@ -1,5 +1,6 @@
 import sympy as sp
 from enum import Enum, auto
+from itertools import product
 
 from pystencils import Assignment, Field
 from lbmpy.relaxationrates import get_shear_relaxation_rate, relaxation_rate_scaling
@@ -59,8 +60,25 @@ class InterpolationConfig:
             NotImplementedError()
 
 
-    def fine_to_coarse_linear_interpolation_assignments(self, src, buffer, dir_indices):
+    def get_coarse_to_fine_assignments(self, src, buffer, dir_indices):
+        if self.coarse_to_fine_interpolation == Interpolation.Explosion:
+            return self.coarse_to_fine_interpolation_assignments(src, buffer, dir_indices)
+        #elif self.coarse_to_fine_interpolation == Interpolation.Compact:
+        #    return self.fine_to_coarse_compact_interpolation_assignments(src, buffer, dir_indices)
+        else:
+            NotImplementedError()
+
+
+    def coarse_to_fine_interpolation_assignments(self, src, buffer, dir_indices):
+        assignments = []
+        for o in range(8):
+            for i, d in enumerate(dir_indices):
+                assignments.append(Assignment(buffer(i + o * len(dir_indices)), src[d]))
+        return assignments
+
 
+
+    def fine_to_coarse_linear_interpolation_assignments(self, src, buffer, dir_indices):
         linear_interpol_offset = [(0,0,0), (1,0,0), (0,0,1), (1,0,1), (0,1,0), (1,1,0), (0,1,1), (1,1,1)]
         assignments = []
 
diff --git a/python/lbmpy_walberla/packing_kernels.py b/python/lbmpy_walberla/packing_kernels.py
index 20feba6717b578183b28870351547828ce437ae2..8d1bd13de8ef8d7fe5093214e08d1b8d38ab93fd 100644
--- a/python/lbmpy_walberla/packing_kernels.py
+++ b/python/lbmpy_walberla/packing_kernels.py
@@ -136,10 +136,11 @@ class PackingKernelsCodegen:
 
     def create_nonuniform_kernel_families(self, kernels_dict=None):
         kernels = dict() if kernels_dict is None else kernels_dict
-        kernels['localCopyRedistribute'] = self.get_local_copy_redistribute_kernel_family()
-        kernels['localPartialCoalescence'] = self.get_local_copy_partial_coalescence_kernel_family()
-        kernels['unpackRedistribute'] = self.get_unpack_redistribute_kernel_family()
-        kernels['packPartialCoalescence'] = self.get_pack_partial_coalescence_kernel_family()
+        kernels['localCoarseToFine'] = self.get_local_coarse_to_fine_kernel_family()
+        kernels['localFineToCoarse'] = self.get_local_fine_to_coarse_kernel_family()
+        kernels['packCoarseToFine'] = self.get_pack_coarse_to_fine_kernel_family()
+        kernels['unpackCoarseToFine'] = self.get_unpack_coarse_to_fine_kernel_family()
+        kernels['packFineToCoarse'] = self.get_pack_fine_to_coarse_kernel_family()
         return kernels
 
     # --------------------------- Pack / Unpack / LocalCopy All --------------------------------------------------------
@@ -269,25 +270,50 @@ class PackingKernelsCodegen:
 
     # --------------------------- Pack / Unpack / LocalCopy Coarse to Fine ---------------------------------------------
 
-    def get_unpack_redistribute_ast(self, comm_dir, timestep):
+    def get_pack_coarse_to_fine_ast(self, comm_dir, timestep):
+
+        config = replace(self.config, ghost_layers=0)
+        assert not all(d == 0 for d in comm_dir)
+
+        dir_string = offset_to_direction_string(comm_dir)
+        streaming_dirs = self.get_streaming_dirs(comm_dir)
+        #dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs)
+        dir_indices = [i for i in range(self.values_per_cell)]
+
+        buffer = self._buffer(len(dir_indices) * 8)
+        src, _ = self._stream_out_accs(timestep)
+
+        assignments = self.interpolation_config.get_coarse_to_fine_assignments(src, buffer, dir_indices)
+
+        ast = create_kernel(assignments, config=config)
+        ast.function_name = f'packCoarseToFine_{dir_string}' + timestep_suffix(timestep)
+        return ast
+
+    def get_pack_coarse_to_fine_kernel_family(self):
+        return self._construct_directionwise_kernel_family(self.get_pack_coarse_to_fine_ast)
+
+
+    def get_unpack_coarse_to_fine_ast(self, comm_dir, timestep):
         assert not all(d == 0 for d in comm_dir)
         dir_string = offset_to_direction_string(comm_dir)
         streaming_dirs = self.get_streaming_dirs(inverse_direction(comm_dir))
-        dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs)
+        #dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs)
+        dir_indices = [i for i in range(self.values_per_cell)]
         if len(dir_indices) == 0:
             return None
-        buffer = self._buffer(self.values_per_cell)
+
+        buffer = self._buffer(len(dir_indices) * 8)
         _, dst = self._stream_out_accs(timestep)
 
         offsets = list(product(*((0, 1) for _ in comm_dir)))
         assignments = []
 
-        for o in offsets:
-            for i in range(self.values_per_cell):
-                assignments.append(Assignment(dst[i].get_shifted(*o), buffer(i)))
+        for o, off in enumerate(offsets):
+            for i, dir_idx in enumerate(dir_indices):
+                assignments.append(Assignment(dst[dir_idx].get_shifted(*off), buffer(i + o * len(dir_indices))))
 
 
-        function_name = f'unpackRedistribute_{dir_string}' + timestep_suffix(timestep)
+        function_name = f'unpackCoarseToFine_{dir_string}' + timestep_suffix(timestep)
         iteration_slice = tuple(slice(None, None, 2) for _ in range(self.dim))
         config = CreateKernelConfig(function_name=function_name, iteration_slice=iteration_slice,
                                     data_type=self.data_type, ghost_layers=0, allow_double_writes=True,
@@ -295,11 +321,11 @@ class PackingKernelsCodegen:
 
         return create_kernel(assignments, config=config)
 
-    def get_unpack_redistribute_kernel_family(self):
-        return self._construct_directionwise_kernel_family(self.get_unpack_redistribute_ast,
+    def get_unpack_coarse_to_fine_kernel_family(self):
+        return self._construct_directionwise_kernel_family(self.get_unpack_coarse_to_fine_ast,
                                                            exclude_time_step=Timestep.EVEN)
 
-    def get_local_copy_redistribute_ast(self, comm_dir, timestep):
+    def get_local_coarse_to_fine_ast(self, comm_dir, timestep):
         assert not all(d == 0 for d in comm_dir)
         ctr = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(self.stencil.D)]
 
@@ -339,7 +365,7 @@ class PackingKernelsCodegen:
                 src_access = [int_div(ctr[i], 2) + o for i, o in enumerate(src[d].offsets)]
                 assignments.append(Assignment(field_acc, src_abs.absolute_access(src_access, (d, ))))
 
-        function_name = f'localCopyRedistribute_{dir_string}' + timestep_suffix(timestep)
+        function_name = f'localCoarseToFine_{dir_string}' + timestep_suffix(timestep)
         iteration_slice = tuple(slice(None, None, 2) for _ in range(self.dim))
         config = CreateKernelConfig(function_name=function_name, iteration_slice=iteration_slice,
                                     data_type=self.data_type, ghost_layers=0, allow_double_writes=True,
@@ -347,19 +373,20 @@ class PackingKernelsCodegen:
 
         return create_kernel(assignments, config=config)
 
-    def get_local_copy_redistribute_kernel_family(self):
-        return self._construct_directionwise_kernel_family(self.get_local_copy_redistribute_ast)
+    def get_local_coarse_to_fine_kernel_family(self):
+        return self._construct_directionwise_kernel_family(self.get_local_coarse_to_fine_ast)
 
     # --------------------------- Pack / Unpack / LocalCopy Fine to Coarse ---------------------------------------------
 
-    def get_pack_partial_coalescence_ast(self, comm_dir, timestep):
+    def get_pack_fine_to_coarse_ast(self, comm_dir, timestep):
         assert not all(d == 0 for d in comm_dir)
         dir_string = offset_to_direction_string(comm_dir)
         streaming_dirs = self.get_streaming_dirs(comm_dir)
-        dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs)
+        #dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs)
+        dir_indices = [i for i in range(self.values_per_cell)]
         if len(dir_indices) == 0:
             return None
-        buffer = self._buffer(len(streaming_dirs))
+        buffer = self._buffer(len(dir_indices))
         src, _ = self._stream_out_accs(timestep)
 
         assignments = self.interpolation_config.get_fine_to_coarse_assignments(src, buffer, dir_indices)
@@ -368,15 +395,15 @@ class PackingKernelsCodegen:
         config = replace(self.config, iteration_slice=iteration_slice, ghost_layers=0)
 
         ast = create_kernel(assignments, config=config)
-        ast.function_name = f'packPartialCoalescence_{dir_string}' + timestep_suffix(timestep)
+        ast.function_name = f'packFineToCoarse_{dir_string}' + timestep_suffix(timestep)
         return ast
 
-    def get_pack_partial_coalescence_kernel_family(self):
-        return self._construct_directionwise_kernel_family(self.get_pack_partial_coalescence_ast,
+    def get_pack_fine_to_coarse_kernel_family(self):
+        return self._construct_directionwise_kernel_family(self.get_pack_fine_to_coarse_ast,
                                                            exclude_time_step=Timestep.ODD)
 
     #TODO not working
-    def get_local_copy_partial_coalescence_ast(self, comm_dir, timestep):
+    def get_local_fine_to_coarse_ast(self, comm_dir, timestep):
         assert not all(d == 0 for d in comm_dir)
         ctr = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(self.stencil.D)]
 
@@ -405,11 +432,11 @@ class PackingKernelsCodegen:
         config = replace(self.config, iteration_slice=iteration_slice, ghost_layers=0)
 
         ast = create_kernel(assignments, config=config)
-        ast.function_name = f'localPartialCoalescence_{dir_string}' + timestep_suffix(timestep)
+        ast.function_name = f'localFineToCoarse_{dir_string}' + timestep_suffix(timestep)
         return ast
 
-    def get_local_copy_partial_coalescence_kernel_family(self):
-        return self._construct_directionwise_kernel_family(self.get_local_copy_partial_coalescence_ast)
+    def get_local_fine_to_coarse_kernel_family(self):
+        return self._construct_directionwise_kernel_family(self.get_local_fine_to_coarse_ast)
 
     # ------------------------------------------ Utility ---------------------------------------------------------------
 
diff --git a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp
index 0b2db4960c1ff886d5d88b18cc009934670760b3..611850b3c078de6dcb31430ea2414e039fe31463 100644
--- a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp
+++ b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp
@@ -75,10 +75,11 @@ namespace {{namespace}} {
    {{ kernels['localCopyDirection'] | generate_definitions }}
 
    {% if nonuniform -%}
-   {{ kernels['localCopyRedistribute']    | generate_definitions }}
-   {{ kernels['localPartialCoalescence']    | generate_definitions }}
-   {{ kernels['unpackRedistribute']    | generate_definitions }}
-   {{ kernels['packPartialCoalescence']    | generate_definitions }}
+   {{ kernels['localCoarseToFine']    | generate_definitions }}
+   {{ kernels['localFineToCoarse']    | generate_definitions }}
+   {{ kernels['packCoarseToFine']     | generate_definitions }}
+   {{ kernels['unpackCoarseToFine']   | generate_definitions }}
+   {{ kernels['packFineToCoarse']     | generate_definitions }}
    {%- endif %}
 
    /*************************************************************************************
@@ -222,48 +223,60 @@ namespace {{namespace}} {
 
 
    {% if nonuniform -%}
-   void {{class_name}}::PackKernels::localCopyRedistribute(
+   void {{class_name}}::PackKernels::localCoarseToFine(
       {{- [  "PdfField_T * " + src_field.name, "CellInterval & srcInterval",
-             "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localCopyRedistribute'].kernel_selection_parameters,
+             "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localCoarseToFine'].kernel_selection_parameters,
              ["gpuStream_t stream"] if is_gpu else []]
           | type_identifier_list -}}
    ) const
    {
-      {{kernels['localCopyRedistribute'] | generate_call(cell_interval={src_field.name : 'srcInterval', dst_field.name : 'dstInterval'}, stream='stream') | indent(6) }}
+      {{kernels['localCoarseToFine'] | generate_call(cell_interval={src_field.name : 'srcInterval', dst_field.name : 'dstInterval'}, stream='stream') | indent(6) }}
    }
 
-   void {{class_name}}::PackKernels::localPartialCoalescence(
+   void {{class_name}}::PackKernels::localFineToCoarse(
       {{- [  "PdfField_T * " + src_field.name, "ScalarField_T * " + density.name, "VelocityField_T * " + velocity.name,
              "StrainRateTensorField_T * " + strain_rate_tensor.name, "real_t omega", "CellInterval & srcInterval",
-             "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localPartialCoalescence'].kernel_selection_parameters,
+             "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localFineToCoarse'].kernel_selection_parameters,
              ["gpuStream_t stream"] if is_gpu else []]
           | type_identifier_list -}}
    ) const
    {
-      {{kernels['localPartialCoalescence'] | generate_call(cell_interval={src_field.name : 'srcInterval', dst_field.name : 'dstInterval', density.name : 'srcInterval', velocity.name : 'srcInterval', strain_rate_tensor.name : 'srcInterval' }, stream='stream') | indent(6) }}
+      {{kernels['localFineToCoarse'] | generate_call(cell_interval={src_field.name : 'srcInterval', dst_field.name : 'dstInterval', density.name : 'srcInterval', velocity.name : 'srcInterval', strain_rate_tensor.name : 'srcInterval' }, stream='stream') | indent(6) }}
    }
 
-   void {{class_name}}::PackKernels::unpackRedistribute(
+   void {{class_name}}::PackKernels::packCoarseToFine(
+      {{- [ "PdfField_T * " + src_field.name, "ScalarField_T * " + density.name, "VelocityField_T * " + velocity.name,
+             "StrainRateTensorField_T * " + strain_rate_tensor.name, "real_t omega", "CellInterval & srcInterval",
+             "unsigned char * outBuffer", kernels['packCoarseToFine'].kernel_selection_parameters,
+             ["gpuStream_t stream = nullptr"] if is_gpu else []]
+          | type_identifier_list -}}
+   ) const
+   {
+      {{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(outBuffer);
+      {{kernels['packCoarseToFine'] | generate_call(cell_interval={src_field.name : 'srcInterval', dst_field.name : 'dstInterval', density.name : 'srcInterval', velocity.name : 'srcInterval', strain_rate_tensor.name : 'srcInterval' }, stream='stream') | indent(6) }}
+   }
+
+   void {{class_name}}::PackKernels::unpackCoarseToFine(
       {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
-             "unsigned char * inBuffer", kernels['unpackDirection'].kernel_selection_parameters,
+             "unsigned char * inBuffer", kernels['unpackCoarseToFine'].kernel_selection_parameters,
              ["gpuStream_t stream"] if is_gpu else []]
           | type_identifier_list -}}
    ) const
    {
       {{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(inBuffer);
-      {{kernels['unpackRedistribute'] | generate_call(cell_interval='ci', stream='stream') | indent(6) }}
+      {{kernels['unpackCoarseToFine'] | generate_call(cell_interval='ci', stream='stream') | indent(6) }}
    }
 
-   void {{class_name}}::PackKernels::packPartialCoalescence(
+   void {{class_name}}::PackKernels::packFineToCoarse(
       {{- [ "PdfField_T * " + src_field.name, "ScalarField_T * " + density.name, "VelocityField_T * " + velocity.name,
              "StrainRateTensorField_T * " + strain_rate_tensor.name, "real_t omega", "CellInterval & ci",
-             "unsigned char * outBuffer", kernels['packPartialCoalescence'].kernel_selection_parameters,
+             "unsigned char * outBuffer", kernels['packFineToCoarse'].kernel_selection_parameters,
              ["gpuStream_t stream"] if is_gpu else []]
           | type_identifier_list -}}
    ) const
    {
       {{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(outBuffer);
-      {{kernels['packPartialCoalescence'] | generate_call(cell_interval='ci', stream='stream') | indent(6) }}
+      {{kernels['packFineToCoarse'] | generate_call(cell_interval='ci', stream='stream') | indent(6) }}
    }
    {%- endif %}
 }  // namespace {{namespace}}
diff --git a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h
index ec17cf6c248b12edefea82b1fd9d36897405c89c..a66e87c2df63b7b08f54151351200aae270a41e8 100644
--- a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h
+++ b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h
@@ -217,9 +217,9 @@ class {{class_name}}
       /**
        * Local uniform redistribute.
        * */
-      void localCopyRedistribute(
+      void localCoarseToFine(
          {{- [  "PdfField_T * " + src_field.name, "CellInterval & srcInterval",
-                "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localCopyRedistribute'].kernel_selection_parameters,
+                "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localCoarseToFine'].kernel_selection_parameters,
                 ["gpuStream_t stream = nullptr"] if is_gpu else []]
              | type_identifier_list -}}
       ) const;
@@ -227,20 +227,33 @@ class {{class_name}}
       /**
        * Local partial coalescence.
        * */
-      void localPartialCoalescence(
+      void localFineToCoarse(
          {{- [  "PdfField_T * " + src_field.name, "ScalarField_T * " + density.name, "VelocityField_T * " + velocity.name,
                 "StrainRateTensorField_T * " + strain_rate_tensor.name, "real_t omega", "CellInterval & srcInterval",
-                "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localPartialCoalescence'].kernel_selection_parameters,
+                "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localFineToCoarse'].kernel_selection_parameters,
                 ["gpuStream_t stream = nullptr"] if is_gpu else []]
              | type_identifier_list -}}
       ) const;
 
+
+      /**
+       * Packs populations coming from a coarse block onto the fine grid.
+       * */
+      void packCoarseToFine(
+         {{- [ "PdfField_T * " + src_field.name, "ScalarField_T * " + density.name, "VelocityField_T * " + velocity.name,
+                "StrainRateTensorField_T * " + strain_rate_tensor.name, "real_t omega", "CellInterval & ci",
+                "unsigned char * outBuffer", kernels['packCoarseToFine'].kernel_selection_parameters,
+                ["gpuStream_t stream = nullptr"] if is_gpu else []]
+             | type_identifier_list -}}
+      ) const;
+
+
       /**
        * Unpacks and uniformly redistributes populations coming from a coarse block onto the fine grid.
        * */
-      void unpackRedistribute(
+      void unpackCoarseToFine(
          {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
-                "unsigned char * inBuffer", kernels['unpackRedistribute'].kernel_selection_parameters,
+                "unsigned char * inBuffer", kernels['unpackCoarseToFine'].kernel_selection_parameters,
                 ["gpuStream_t stream = nullptr"] if is_gpu else []]
              | type_identifier_list -}}
       ) const;
@@ -248,10 +261,10 @@ class {{class_name}}
       /**
        * Partially coalesces and packs populations streaming from a fine block into a coarse block
        * */
-      void packPartialCoalescence(
+      void packFineToCoarse(
          {{- [ "PdfField_T * " + src_field.name, "ScalarField_T * " + density.name, "VelocityField_T * " + velocity.name,
                 "StrainRateTensorField_T * " + strain_rate_tensor.name, "real_t omega", "CellInterval & ci",
-                "unsigned char * outBuffer", kernels['packPartialCoalescence'].kernel_selection_parameters,
+                "unsigned char * outBuffer", kernels['packFineToCoarse'].kernel_selection_parameters,
                 ["gpuStream_t stream = nullptr"] if is_gpu else []]
              | type_identifier_list -}}
       ) const;
@@ -275,7 +288,7 @@ class {{class_name}}
        * @return    The required size of the buffer, in bytes
        * */
       uint_t partialCoalescenceSize(CellInterval & ci, stencil::Direction dir) const {
-         return size(ci, dir) >> {{dimension}};
+         return size(ci) >> {{dimension}};
       }
 
       {%- endif %}
diff --git a/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h b/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h
index fc44cdc44e2ddb24b8f2dd9dd4ca4a3a275be05b..ed93f59e081f58ed800447080d3fd0265b0b0484 100644
--- a/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h
+++ b/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h
@@ -57,13 +57,16 @@ class NonuniformPackingKernelsWrapper
    void localCopyRedistribute(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
                               CellInterval dstInterval, Direction dir) const = 0;
 
-   void localPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskField* maskField, CellInterval srcInterval,
+   void localFineToCoarse(PdfField_T* srcField, CellInterval srcInterval,
                                 PdfField_T* dstField, CellInterval dstInterval, Direction dir) const = 0;
 
-   void unpackRedistribute(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer,
+   void packCoarseToFine(PdfField_T* srcField, CellInterval& ci,
+                         unsigned char* inBuffer, Direction dir) const = 0;
+
+   void unpackCoarseToFine(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer,
                            stencil::Direction dir) const = 0;
 
-   void packPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskField* maskField, CellInterval& ci,
+   void packFineToCoarse(PdfField_T* srcField, CellInterval& ci,
                                unsigned char* outBuffer, Direction dir) const = 0;
 
    uint_t size(CellInterval& ci, Direction dir) const                   = 0;
@@ -114,28 +117,33 @@ class NonuniformPackingKernelsWrapper< PdfField_T, false >
       kernels_.localCopyDirection(srcField, srcInterval, dstField, dstInterval, dir);
    }
 
-   void localCopyRedistribute(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
+   void localCoarseToFine(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
                               CellInterval dstInterval, Direction dir) const
    {
-      kernels_.localCopyRedistribute(srcField, srcInterval, dstField, dstInterval, dir);
+      kernels_.localCoarseToFine(srcField, srcInterval, dstField, dstInterval, dir);
    }
 
-   void localPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskField* maskField, CellInterval srcInterval,
+   void localFineToCoarse(PdfField_T* srcField, CellInterval srcInterval,
                                 PdfField_T* dstField, CellInterval dstInterval, Direction dir) const
    {
-      kernels_.localPartialCoalescence(srcField, maskField, srcInterval, dstField, dstInterval, dir);
+      kernels_.localFineToCoarse(srcField, srcInterval, dstField, dstInterval, dir);
+   }
+
+   void packCoarseToFine(PdfField_T* srcField, CellInterval& ci, unsigned char* inBuffer, Direction dir) const
+   {
+      kernels_.packCoarseToFine(srcField, nullptr, nullptr, nullptr, 0.0, ci, inBuffer, dir);
    }
 
-   void unpackRedistribute(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer,
+   void unpackCoarseToFine(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer,
                            stencil::Direction dir) const
    {
-      kernels_.unpackRedistribute(dstField, ci, inBuffer, dir);
+      kernels_.unpackCoarseToFine(dstField, ci, inBuffer, dir);
    }
 
-   void packPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskField* maskField, CellInterval& ci,
+   void packFineToCoarse(PdfField_T* srcField, CellInterval& ci,
                                unsigned char* outBuffer, Direction dir) const
    {
-      kernels_.packPartialCoalescence(srcField, nullptr, nullptr, nullptr, 0.0, ci, outBuffer, dir);
+      kernels_.packFineToCoarse(srcField, nullptr, nullptr, nullptr, 0.0, ci, outBuffer, dir);
    }
 
    uint_t size(CellInterval& ci, Direction dir) const { return kernels_.size(ci, dir); }
@@ -210,46 +218,44 @@ class NonuniformPackingKernelsWrapper< PdfField_T, true >
       kernels_.localCopyRedistribute(srcField, srcInterval, dstField, dstInterval, dir, timestep);
    }
 
-   void localPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskField* maskField, CellInterval srcInterval,
+   void localFineToCoarse(PdfField_T* srcField, CellInterval srcInterval,
                                 PdfField_T* dstField, CellInterval dstInterval, Direction dir) const
    {
       uint8_t timestep = dstField->getTimestep();
       WALBERLA_ASSERT((srcField->getTimestep() & 1) ^ 1, "When the fine to coarse step is executed, the fine Field must "
                                                          "be on an even timestep, while the source field could either be "
                                                          "on an even or an odd state.")
-      kernels_.localPartialCoalescence(srcField, maskField, srcInterval, dstField, dstInterval, dir, timestep);
+      kernels_.localPartialCoalescence(srcField, srcInterval, dstField, dstInterval, dir, timestep);
    }
 
-   void unpackRedistribute(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer,
+   void packCoarseToFine(PdfField_T* srcField, CellInterval& ci,
+                         unsigned char* inBuffer, Direction dir) const
+   {
+      uint8_t timestep = srcField->getTimestep();
+      WALBERLA_ASSERT(((timestep & 1) ^ 1), "When the course to fine step is executed, the fine Field must "
+                                                            "be on an odd timestep, while the source field could either be "
+                                                            "on an even or an odd state.")
+      kernels_.packCoarseToFine(srcField, nullptr, nullptr, nullptr, 0.0, ci, inBuffer, dir, timestep);
+   }
+
+   void unpackCoarseToFine(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer,
                            stencil::Direction dir) const
    {
       uint8_t timestep = dstField->getTimestep();
       WALBERLA_ASSERT(!((dstField->getTimestep() & 1) ^ 1), "When the course to fine step is executed, the fine Field must "
                                                             "be on an odd timestep, while the source field could either be "
                                                             "on an even or an odd state.")
-      kernels_.unpackRedistribute(dstField, ci, inBuffer, dir, timestep);
+      kernels_.unpackCoarseToFine(dstField, ci, inBuffer, dir, timestep);
    }
 
-   void packPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskField* maskField, CellInterval& ci,
+   void packFineToCoarse(PdfField_T* srcField, CellInterval& ci,
                                unsigned char* outBuffer, Direction dir) const
    {
       uint8_t timestep = srcField->getTimestep();
       WALBERLA_ASSERT((srcField->getTimestep() & 1) ^ 1, "When the fine to coarse step is executed, the fine Field must "
                                                          "be on an even timestep, while the source field could either be "
                                                          "on an even or an odd state.")
-      kernels_.packPartialCoalescence(srcField, maskField, ci, outBuffer, dir, timestep);
-   }
-
-   void zeroCoalescenceRegion(PdfField_T* dstField, CellInterval& ci, Direction dir) const
-   {
-      uint8_t timestep = dstField->getTimestep();
-      kernels_.zeroCoalescenceRegion(dstField, ci, dir, timestep);
-   }
-
-   void unpackCoalescence(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, Direction dir) const
-   {
-      uint8_t timestep = dstField->getTimestep();
-      kernels_.unpackCoalescence(dstField, ci, inBuffer, dir, timestep);
+      kernels_.packPartialCoalescence(srcField, ci, outBuffer, dir, timestep);
    }
 
    uint_t size(CellInterval& ci, Direction dir) const { return kernels_.size(ci, dir); }
diff --git a/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.impl.h b/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.impl.h
index f83fb47a14704be9d8f14bd6c8c416ef0b9c6b9a..2d785cdc127e6ef432f33261cf63c65cfe22ebc9 100644
--- a/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.impl.h
+++ b/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.impl.h
@@ -120,7 +120,7 @@ void NonuniformGeneratedPdfPackInfo< PdfField_T >::packDataCoarseToFineImpl(
    {
       CellInterval ci          = t.second;
       unsigned char* bufferPtr = buffer.forward(kernels_.size(ci));
-      kernels_.packAll(field, ci, bufferPtr);
+      kernels_.packCoarseToFine(field, ci, bufferPtr, dir);
    }
 }
 
@@ -139,7 +139,7 @@ void NonuniformGeneratedPdfPackInfo< PdfField_T >::unpackDataCoarseToFine(
       CellInterval ci          = t.second;
       uint_t size              = kernels_.redistributeSize(ci);
       unsigned char* bufferPtr = buffer.skip(size);
-      kernels_.unpackRedistribute(field, ci, bufferPtr, d);
+      kernels_.unpackCoarseToFine(field, ci, bufferPtr, d);
    }
 }
 
@@ -174,10 +174,10 @@ void NonuniformGeneratedPdfPackInfo< PdfField_T >::communicateLocalCoarseToFine(
 
       // TODO: This is a dirty workaround. Code-generate direct redistribution!
       std::vector< unsigned char > buffer(kernels_.size(srcInterval));
-      kernels_.packAll(srcField, srcInterval, &buffer[0]);
-      kernels_.unpackRedistribute(dstField, dstInterval, &buffer[0], unpackDir);
+      kernels_.packCoarseToFine(srcField, srcInterval, &buffer[0], dir);
+      kernels_.unpackCoarseToFine(dstField, dstInterval, &buffer[0], unpackDir);
 
-      // kernels_.localCopyRedistribute(srcField, srcInterval, dstField, dstInterval, unpackDir);
+      // kernels_.localCoarseToFine(srcField, srcInterval, dstField, dstInterval, unpackDir);
    }
 }
 
@@ -195,7 +195,7 @@ void walberla::lbm_generated::NonuniformGeneratedPdfPackInfo< PdfField_T >::unpa
    CellInterval ci = getCoarseBlockCoalescenceInterval(coarseReceiver, fineSender, dir, dstField);
    uint_t size = kernels_.size(ci, dir);
    unsigned char* bufferPtr = buffer.skip(size);
-   kernels_.unpackDirection(dstField, ci, bufferPtr, dir);
+   kernels_.unpackAll(dstField, ci, bufferPtr);
 }
 
 template< typename PdfField_T>
@@ -209,8 +209,6 @@ void walberla::lbm_generated::NonuniformGeneratedPdfPackInfo< PdfField_T >::comm
 
    Block * varFineSender = const_cast< Block * >(fineSender);
    auto srcField   = varFineSender->getData< PdfField_T >(pdfFieldID_);
-   auto srcCommData   = varFineSender->getData< CommData_T >(commDataID_);
-   PartialCoalescenceMaskField * maskField = &(srcCommData->getMaskField());
    auto dstField = coarseReceiver->getData<PdfField_T>(pdfFieldID_);
 
    CellInterval srcInterval;
@@ -226,9 +224,10 @@ void walberla::lbm_generated::NonuniformGeneratedPdfPackInfo< PdfField_T >::comm
 
    // TODO: This is a dirty workaround. Code-generate direct redistribution!
    std::vector< unsigned char > buffer(kernels_.partialCoalescenceSize(srcInterval, dir));
-   kernels_.packPartialCoalescence(srcField, maskField, srcInterval, &buffer[0], dir);
-   kernels_.unpackDirection(dstField, dstInterval, &buffer[0], invDir);
-   // kernels_.localPartialCoalescence(srcField, maskField, srcInterval, dstField, dstInterval, dir);
+   kernels_.packFineToCoarse(srcField, srcInterval, &buffer[0], dir);
+   kernels_.unpackAll(dstField, dstInterval, &buffer[0]);
+   //kernels_.unpackDirection(dstField, dstInterval, &buffer[0], invDir);
+   // kernels_.localFineToCoarse(srcField, srcInterval, dstField, dstInterval, dir);
 }
 
 template< typename PdfField_T>
@@ -238,14 +237,11 @@ void walberla::lbm_generated::NonuniformGeneratedPdfPackInfo< PdfField_T >::pack
 {
    Block* varBlock = const_cast< Block* >(fineSender);
    auto srcField   = varBlock->getData< PdfField_T >(pdfFieldID_);
-   auto commData  = varBlock->getData< CommData_T >(commDataID_);
-   PartialCoalescenceMaskField * maskField = &(commData->getMaskField());
-
    CellInterval ci;
    srcField->getSliceBeforeGhostLayer(dir, ci, 2);
    uint_t size = kernels_.partialCoalescenceSize(ci, dir);
    unsigned char* bufferPtr = buffer.forward(size);
-   kernels_.packPartialCoalescence(srcField, maskField, ci, bufferPtr, dir);
+   kernels_.packFineToCoarse(srcField, ci, bufferPtr, dir);
 }
 
 /***********************************************************************************************************************