Compare revisions

d3f62364 · d3f62364 · d3f62364 · d3f62364 · d3f62364 · d3f62364
--- a/lbmpy/session.py
+++ b/lbmpy/session.py
 import numpy as np
 import sympy as sp

-import lbmpy.plot as plt
 import pystencils as ps
+from pystencils import make_slice, show_code
+from pystencils.jupyter import *
+from lbmpy.advanced_streaming import *
 from lbmpy.boundaries import *
 from lbmpy.creationfunctions import *
+from lbmpy.enums import *
 from lbmpy.geometry import *
 from lbmpy.lbstep import LatticeBoltzmannStep
+from lbmpy.macroscopic_value_kernels import pdf_initialization_assignments
 from lbmpy.parameterization import ScalingWidget
+import lbmpy.plot as plt
 from lbmpy.postprocessing import *
 from lbmpy.scenarios import *
-from pystencils import make_slice, show_code
-from pystencils.jupyter import *
-from pystencils.sympy_gmpy_bug_workaround import *
+from lbmpy.stencils import LBStencil
--- a/src/lbmpy/simplificationfactory.py
+++ b/src/lbmpy/simplificationfactory.py
+import sympy as sp
+
+from lbmpy.innerloopsplit import create_lbm_split_groups
+from lbmpy.methods.momentbased.momentbasedmethod import MomentBasedLbMethod
+from lbmpy.methods.momentbased.centralmomentbasedmethod import CentralMomentBasedLbMethod
+from lbmpy.methods.cumulantbased import CumulantBasedLbMethod
+
+from lbmpy.methods.cumulantbased.cumulant_simplifications import (
+    insert_log_products, expand_post_collision_central_moments)
+from lbmpy.methods.momentbased.momentbasedsimplifications import (
+    factor_density_after_factoring_relaxation_times, factor_relaxation_rates,
+    replace_common_quadratic_and_constant_term, replace_density_and_velocity, replace_second_order_velocity_products,
+    insert_half_force, insert_conserved_quantity_products)
+from pystencils.simp import (
+    SimplificationStrategy, add_subexpressions_for_divisions, apply_to_all_assignments,
+    subexpression_substitution_in_main_assignments, insert_aliases, insert_constants,
+    add_subexpressions_for_constants)
+# add_subexpressions_for_constants)
+
+
+def create_simplification_strategy(lb_method, split_inner_loop=False):
+    if isinstance(lb_method, MomentBasedLbMethod):
+        if lb_method.moment_space_collision:
+            return _moment_space_simplification(split_inner_loop)
+        else:
+            if len(set(lb_method.relaxation_rates)) <= 2:
+                # SRT and TRT methods with population-space collision
+                return _srt_trt_population_space_simplification(split_inner_loop)
+            else:
+                # General MRT methods with population-space collision
+                return _mrt_population_space_simplification(split_inner_loop)
+    elif isinstance(lb_method, CentralMomentBasedLbMethod):
+        return _moment_space_simplification(split_inner_loop)
+    elif isinstance(lb_method, CumulantBasedLbMethod):
+        return _cumulant_space_simplification(split_inner_loop)
+    else:
+        return SimplificationStrategy()
+
+#   --------------- Internal ----------------------------------------------------------------------------
+
+
+def _srt_trt_population_space_simplification(split_inner_loop):
+    s = SimplificationStrategy()
+    expand = apply_to_all_assignments(sp.expand)
+    s.add(expand)
+    s.add(replace_second_order_velocity_products)
+    s.add(expand)
+    s.add(factor_relaxation_rates)
+    s.add(replace_density_and_velocity)
+    s.add(replace_common_quadratic_and_constant_term)
+    s.add(factor_density_after_factoring_relaxation_times)
+    s.add(subexpression_substitution_in_main_assignments)
+    if split_inner_loop:
+        s.add(create_lbm_split_groups)
+    s.add(add_subexpressions_for_divisions)
+    s.add(insert_constants)
+    s.add(insert_aliases)
+    s.add(lambda ac: ac.new_without_unused_subexpressions())
+    return s
+
+
+def _mrt_population_space_simplification(split_inner_loop):
+    s = SimplificationStrategy()
+    s.add(subexpression_substitution_in_main_assignments)
+    s.add(add_subexpressions_for_divisions)
+    if split_inner_loop:
+        s.add(create_lbm_split_groups)
+    s.add(lambda ac: ac.new_without_unused_subexpressions())
+    return s
+
+
+def _moment_space_simplification(split_inner_loop):
+    s = SimplificationStrategy()
+    s.add(insert_constants)
+    s.add(insert_half_force)
+    s.add(insert_aliases)
+    s.add(add_subexpressions_for_divisions)
+    s.add(add_subexpressions_for_constants)
+    if split_inner_loop:
+        s.add(create_lbm_split_groups)
+    s.add(lambda ac: ac.new_without_unused_subexpressions())
+    return s
+
+
+def _cumulant_space_simplification(split_inner_loop):
+    s = SimplificationStrategy()
+    s.add(insert_constants)
+    s.add(insert_aliases)
+    s.add(insert_log_products)
+    s.add(insert_conserved_quantity_products)
+    s.add(insert_half_force)
+    s.add(expand_post_collision_central_moments)
+    s.add(insert_aliases)
+    s.add(insert_constants)
+    s.add(insert_aliases)
+    s.add(add_subexpressions_for_divisions)
+    s.add(add_subexpressions_for_constants)
+    if split_inner_loop:
+        s.add(create_lbm_split_groups)
+    s.add(lambda ac: ac.new_without_unused_subexpressions())
+    return s
--- a/lbmpy/sparse/__init__.py
+++ b/lbmpy/sparse/__init__.py
--- a/lbmpy/sparse/mapping.py
+++ b/lbmpy/sparse/mapping.py
@@ -3,7 +3,13 @@ from typing import Tuple
 import numpy as np
 import sympy as sp

-from lbmpy.boundaries.boundaryhandling import LbmWeightInfo
+from .._compat import IS_PYSTENCILS_2
+
+if IS_PYSTENCILS_2:
+    from lbmpy.lookup_tables import LbmWeightInfo
+else:
+    from lbmpy.custom_code_nodes import LbmWeightInfo
+
 from pystencils import Assignment, Field, TypedSymbol
 from pystencils.boundaries.boundaryhandling import BoundaryOffsetInfo
 from pystencils.boundaries.createindexlist import (
@@ -123,7 +129,10 @@ class SparseLbMapper:
                assert direction_idx == 0
                continue
            for own_cell_idx, cell in enumerate(self.fluid_coordinates):
-                inv_neighbor_cell = np.array([cell_i - dir_i for cell_i, dir_i in zip(cell, direction)])
+                inv_neighbor_cell = np.array(
+                    [np.int32(cell_i) - dir_i for cell_i, dir_i in zip(cell, direction)],
+                    dtype=np.uint32
+                )
                if flag_arr[tuple(inv_neighbor_cell)] & fluid_boundary_mask:
                    neighbor_cell_idx = self.cell_idx(tuple(inv_neighbor_cell))
                    result.append(pdf_index(neighbor_cell_idx, direction_idx))
@@ -232,7 +241,17 @@ class SparseLbBoundaryMapper:
        return result

    def assignments(self):
-        return [BoundaryOffsetInfo(self.method.stencil),
+        if IS_PYSTENCILS_2:
+            return (
+                BoundaryOffsetInfo(self.method.stencil).get_array_declarations()
+                + LbmWeightInfo(self.method).get_array_declarations()
+                + [Assignment(self.DIR_SYMBOL, self.index_field(self.DIR_SYMBOL.name))]
+                + self.boundary_eqs
+            )
+        else:
+            return [
+                BoundaryOffsetInfo(self.method.stencil),
                LbmWeightInfo(self.method),
                Assignment(self.DIR_SYMBOL, self.index_field(self.DIR_SYMBOL.name)),
-                *self.boundary_eqs]
+                *self.boundary_eqs
+            ]
--- a/lbmpy/sparse/update_rule_sparse.py
+++ b/lbmpy/sparse/update_rule_sparse.py
@@ -74,7 +74,7 @@ def create_macroscopic_value_setter_sparse(method, pdfs, density, velocity) -> A
        velocity: similar to density parameter
    """
    cqc = method.conserved_quantity_computation
-    inp_eqs = cqc.equilibrium_input_equations_from_init_values(density, velocity)
+    inp_eqs = cqc.equilibrium_input_equations_from_init_values(density, velocity, force_substitution=False)
    result = method.get_equilibrium(conserved_quantity_equations=inp_eqs)
    substitutions = {a: b for a, b in zip(method.post_collision_pdf_symbols, pdfs.center_vector)}
    return result.new_with_substitutions(substitutions).new_without_subexpressions()

--- a/src/lbmpy/stencils.py
+++ b/src/lbmpy/stencils.py
+import pystencils as ps
+from pystencils.stencil import have_same_entries
+from lbmpy.enums import Stencil
+
+import sympy as sp
+
+
+class LBStencil:
+    r"""
+    Class representing a lattice Boltzmann stencil in DxQy notation, where d is the dimension
+    (length of the velocity tuples) and y is number of discrete velocities. For every dimension many different version
+    of a certain stencil is available. The reason for that is to ensure comparability with the literature.
+    Internally the stencil is represented as a tuple of tuples, where the ordering of the tuples plays no role.
+
+    Args:
+        stencil: Can be tuple of tuples which represents a DxQy stencil, a string like 'D2Q9' or an enum of
+                 lbmpy.enums.Stencil
+        ordering: the LBM literature does not use a common order of the discrete velocities, therefore here
+                  different common orderings are available. All orderings lead to the same method, it just has
+                  to be used consistently. Here more orderings are available to compare intermediate results with
+                  the literature.
+    """
+
+    def __init__(self, stencil, ordering='walberla'):
+        if isinstance(stencil, tuple):
+            ordering = None
+            self._stencil_entries = stencil
+        elif isinstance(stencil, str):
+            self._stencil_entries = _predefined_stencils(stencil, ordering)
+        elif isinstance(stencil, Stencil):
+            self._stencil_entries = _predefined_stencils(stencil.name, ordering)
+        else:
+            raise ValueError("The LBStencil can only be created with either a tuple of tuples which defines the "
+                             "stencil, a string or an Enum of type lbmpy.enums.Stencil")
+
+        valid_stencil = ps.stencil.is_valid(self._stencil_entries)
+        if valid_stencil is False:
+            raise ValueError("The stencil you have created is not valid. "
+                             "It probably contains elements with different lengths")
+
+        if len(set(self._stencil_entries)) < len(self._stencil_entries):
+            raise ValueError("The stencil you have created is not valid. "
+                             "It contains duplicated elements")
+
+        self._ordering = ordering
+        self._dim = len(self._stencil_entries[0])
+        self._q = len(self._stencil_entries)
+
+    @property
+    def D(self):
+        return self._dim
+
+    @property
+    def Q(self):
+        return self._q
+
+    @property
+    def ordering(self):
+        return self._ordering
+
+    @property
+    def name(self):
+        return f"D{self.D}Q{self.Q}"
+
+    @property
+    def stencil_entries(self):
+        return self._stencil_entries
+
+    @property
+    def inverse_stencil_entries(self):
+        return tuple([ps.stencil.inverse_direction(d) for d in self._stencil_entries])
+
+    def plot(self, slice=False, **kwargs):
+        ps.stencil.plot(stencil=self._stencil_entries, slice=slice, **kwargs)
+
+    def index(self, direction):
+        assert len(direction) == self.D, "direction must match stencil.D"
+        return self._stencil_entries.index(direction)
+
+    def inverse_index(self, direction):
+        assert len(direction) == self.D, "direction must match stencil.D"
+        direction = ps.stencil.inverse_direction(direction)
+        return self._stencil_entries.index(direction)
+
+    def __getitem__(self, index):
+        return self._stencil_entries[index]
+
+    def __iter__(self):
+        yield from self._stencil_entries
+
+    def __eq__(self, other):
+        return self.ordering == other.ordering and have_same_entries(self._stencil_entries, other.stencil_entries)
+
+    def __len__(self):
+        return len(self._stencil_entries)
+
+    def __str__(self):
+        return str(self._stencil_entries)
+
+    def __hash__(self):
+        return hash(self._stencil_entries)
+
+    def _repr_html_(self):
+        table = """
+        <table style="border:none; width: 100%">
+            <tr {nb}>
+                <th {nb} >Nr.</th>
+                <th {nb} >Direction Name</th>
+                <th {nb} >Direction </th>
+            </tr>
+            {content}
+        </table>
+        """
+        content = ""
+        for i, direction in enumerate(self._stencil_entries):
+            vals = {
+                'nr': sp.latex(i),
+                'name': sp.latex(ps.stencil.offset_to_direction_string(direction)),
+                'entry': sp.latex(direction),
+                'nb': 'style="border:none"'
+            }
+            content += """<tr {nb}>
+                            <td {nb}>${nr}$</td>
+                            <td {nb}>${name}$</td>
+                            <td {nb}>${entry}$</td>
+                         </tr>\n""".format(**vals)
+        return table.format(content=content, nb='style="border:none"')
+
+
+def _predefined_stencils(stencil: str, ordering: str):
+    predefined_stencils = {
+        'D2Q9': {
+            'walberla': ((0, 0),
+                         (0, 1), (0, -1), (-1, 0), (1, 0),
+                         (-1, 1), (1, 1), (-1, -1), (1, -1),),
+            'counterclockwise': ((0, 0),
+                                 (1, 0), (0, 1), (-1, 0), (0, -1),
+                                 (1, 1), (-1, 1), (-1, -1), (1, -1)),
+            'braunschweig': ((0, 0),
+                             (-1, 1), (-1, 0), (-1, -1), (0, -1),
+                             (1, -1), (1, 0), (1, 1), (0, 1)),
+            'uk': ((0, 0),
+                   (1, 0), (-1, 0), (0, 1), (0, -1),
+                   (1, 1), (-1, -1), (-1, 1), (1, -1),
+                   ),
+            'lehmann': ((0, 0),
+                        (1, 0), (-1, 0), (0, 1), (0, -1),
+                        (1, 1), (-1, -1), (1, -1), (-1, 1),
+                        )
+        },
+        'D2V17': {
+            'walberla': (
+                (0, 0), (0, -1), (-1, 0), (1, 0), (0, 1), (-1, -1), (1, -1), (-1, 1), (1, 1), (-2, -2), (2, -2),
+                (-2, 2),
+                (2, 2), (0, -3), (-3, 0), (3, 0), (0, 3)),
+        },
+        'D2V37': {
+            'walberla': (
+                (0, 0), (0, -1), (-1, 0), (1, 0), (0, 1), (-1, -1), (1, -1), (-1, 1), (1, 1), (0, -2), (-2, 0),
+                (2, 0),
+                (0, 2), (-1, -2), (1, -2), (-2, -1), (2, -1), (-2, 1), (2, 1), (-1, 2), (1, 2), (-2, -2), (2, -2),
+                (-2, 2),
+                (2, 2), (0, -3), (-3, 0), (3, 0), (0, 3), (-1, -3), (1, -3), (-3, -1), (3, -1), (-3, 1), (3, 1),
+                (-1, 3),
+                (1, 3))
+        },
+        'D3Q7': {
+            'walberla': ((0, 0, 0),
+                         (0, 1, 0), (0, -1, 0),
+                         (-1, 0, 0), (1, 0, 0),
+                         (0, 0, 1), (0, 0, -1))
+
+        },
+        'D3Q15': {
+            'walberla':
+                ((0, 0, 0),
+                 (0, 1, 0), (0, -1, 0), (-1, 0, 0), (1, 0, 0), (0, 0, 1), (0, 0, -1),
+                 (1, 1, 1), (-1, 1, 1), (1, -1, 1), (-1, -1, 1), (1, 1, -1), (-1, 1, -1), (1, -1, -1),
+                 (-1, -1, -1)),
+            'premnath': ((0, 0, 0),
+                         (1, 0, 0), (-1, 0, 0), (0, 1, 0), (0, -1, 0), (0, 0, 1), (0, 0, -1),
+                         (1, 1, 1), (-1, 1, 1), (1, -1, 1), (-1, -1, 1),
+                         (1, 1, -1), (-1, 1, -1), (1, -1, -1), (-1, -1, -1)),
+            'lehmann': ((0, 0, 0),
+                        (1, 0, 0), (-1, 0, 0), (0, 1, 0), (0, -1, 0), (0, 0, 1), (0, 0, -1),
+                        (1, 1, 1), (-1, -1, -1), (1, 1, -1), (-1, -1, 1),
+                        (1, -1, 1), (-1, 1, -1), (-1, 1, 1), (1, -1, -1)),
+            'fakhari': ((0, 0, 0),
+                        (1, 0, 0), (-1, 0, 0), (0, 1, 0), (0, -1, 0), (0, 0, 1), (0, 0, -1),
+                        (1, 1, 1), (-1, -1, -1), (-1, 1, 1), (1, -1, -1),
+                        (1, -1, 1), (-1, 1, -1), (1, 1, -1), (-1, -1, 1)),
+        },
+        'D3Q19': {
+            'walberla': ((0, 0, 0),
+                         (0, 1, 0), (0, -1, 0), (-1, 0, 0), (1, 0, 0), (0, 0, 1), (0, 0, -1),
+                         (-1, 1, 0), (1, 1, 0), (-1, -1, 0), (1, -1, 0),
+                         (0, 1, 1), (0, -1, 1), (-1, 0, 1), (1, 0, 1),
+                         (0, 1, -1), (0, -1, -1), (-1, 0, -1), (1, 0, -1)),
+            'counterclockwise': ((0, 0, 0),
+                                 (1, 0, 0), (-1, 0, 0), (0, 1, 0), (0, -1, 0), (0, 0, 1), (0, 0, -1),
+                                 (1, 1, 0), (-1, -1, 0), (1, 0, 1), (-1, 0, -1),
+                                 (0, 1, 1), (0, -1, -1), (1, -1, 0), (-1, 1, 0),
+                                 (1, 0, -1), (-1, 0, 1), (0, 1, -1), (0, -1, 1)),
+            'braunschweig': ((0, 0, 0),
+                             (1, 0, 0), (-1, 0, 0), (0, 1, 0), (0, -1, 0), (0, 0, 1), (0, 0, -1),
+                             (1, 1, 0), (-1, -1, 0), (1, -1, 0), (-1, 1, 0),
+                             (1, 0, 1), (-1, 0, -1), (1, 0, -1), (-1, 0, 1),
+                             (0, 1, 1), (0, -1, -1), (0, 1, -1), (0, -1, 1)),
+            'premnath': ((0, 0, 0),
+                         (1, 0, 0), (-1, 0, 0), (0, 1, 0), (0, -1, 0), (0, 0, 1), (0, 0, -1),
+                         (1, 1, 0), (-1, 1, 0), (1, -1, 0), (-1, -1, 0),
+                         (1, 0, 1), (-1, 0, 1), (1, 0, -1), (-1, 0, -1),
+                         (0, 1, 1), (0, -1, 1), (0, 1, -1), (0, -1, -1)),
+            'lehmann': ((0, 0, 0),
+                        (1, 0, 0), (-1, 0, 0), (0, 1, 0), (0, -1, 0), (0, 0, 1), (0, 0, -1),
+                        (1, 1, 0), (-1, -1, 0), (1, 0, 1), (-1, 0, -1),
+                        (0, 1, 1), (0, -1, -1), (1, -1, 0), (-1, 1, 0),
+                        (1, 0, -1), (-1, 0, 1), (0, 1, -1), (0, -1, 1)),
+        },
+        'D3Q27': {
+            'walberla': ((0, 0, 0),
+                         (0, 1, 0), (0, -1, 0), (-1, 0, 0), (1, 0, 0), (0, 0, 1), (0, 0, -1),
+                         (-1, 1, 0), (1, 1, 0), (-1, -1, 0), (1, -1, 0),
+                         (0, 1, 1), (0, -1, 1), (-1, 0, 1), (1, 0, 1),
+                         (0, 1, -1), (0, -1, -1), (-1, 0, -1), (1, 0, -1),
+                         (1, 1, 1), (-1, 1, 1), (1, -1, 1), (-1, -1, 1), (1, 1, -1), (-1, 1, -1), (1, -1, -1),
+                         (-1, -1, -1)),
+            'premnath': ((0, 0, 0),
+                         (1, 0, 0), (-1, 0, 0), (0, 1, 0), (0, -1, 0), (0, 0, 1), (0, 0, -1),
+                         (1, 1, 0), (-1, 1, 0), (1, -1, 0), (-1, -1, 0),
+                         (1, 0, 1), (-1, 0, 1), (1, 0, -1), (-1, 0, -1),
+                         (0, 1, 1), (0, -1, 1), (0, 1, -1), (0, -1, -1),
+                         (1, 1, 1), (-1, 1, 1), (1, -1, 1), (-1, -1, 1),
+                         (1, 1, -1), (-1, 1, -1), (1, -1, -1), (-1, -1, -1)),
+            'fakhari': ((0, 0, 0),
+                        (1, 0, 0), (-1, 0, 0), (0, 1, 0), (0, -1, 0), (0, 0, 1), (0, 0, -1),
+                        (1, 1, 1), (-1, 1, 1), (1, -1, 1), (-1, -1, 1),
+                        (1, 1, -1), (-1, 1, -1), (1, -1, -1), (-1, -1, -1),
+                        (1, 1, 0), (-1, 1, 0), (1, -1, 0), (-1, -1, 0),
+                        (1, 0, 1), (-1, 0, 1), (1, 0, -1), (-1, 0, -1), (0, 1, 1), (0, -1, 1), (0, 1, -1),
+                        (0, -1, -1)),
+            'lehmann': ((0, 0, 0),
+                        (1, 0, 0), (-1, 0, 0), (0, 1, 0), (0, -1, 0), (0, 0, 1), (0, 0, -1),
+                        (1, 1, 0), (-1, -1, 0), (1, 0, 1), (-1, 0, -1),
+                        (0, 1, 1), (0, -1, -1), (1, -1, 0), (-1, 1, 0),
+                        (1, 0, -1), (-1, 0, 1), (0, 1, -1), (0, -1, 1),
+                        (1, 1, 1), (-1, -1, -1), (1, 1, -1), (-1, -1, 1), (1, -1, 1), (-1, 1, -1), (-1, 1, 1),
+                        (1, -1, -1)),
+            'braunschweig': ((0, 0, 0),
+                             (1, 0, 0), (-1, 0, 0), (0, 1, 0), (0, -1, 0), (0, 0, 1), (0, 0, -1),
+                             (1, 1, 0), (-1, -1, 0), (1, -1, 0), (-1, 1, 0),
+                             (1, 0, 1), (-1, 0, -1), (1, 0, -1), (-1, 0, 1),
+                             (0, 1, 1), (0, -1, -1), (0, 1, -1), (0, -1, 1),
+                             (1, 1, 1), (-1, 1, 1), (1, -1, 1), (-1, -1, 1), (1, 1, -1), (-1, 1, -1), (1, -1, -1),
+                             (-1, -1, -1)),
+        }
+    }
+    try:
+        return predefined_stencils[stencil][ordering]
+    except KeyError:
+        err_msg = ""
+        for stencil, ordering_names in predefined_stencils.items():
+            err_msg += "  %s: %s\n" % (stencil, ", ".join(ordering_names.keys()))
+
+        raise ValueError("No such stencil available. "
+                         "Available stencils: <stencil_name>( <ordering_names> )\n" + err_msg)
--- a/src/lbmpy/turbulence_models.py
+++ b/src/lbmpy/turbulence_models.py
+import sympy as sp
+
+from lbmpy.relaxationrates import get_shear_relaxation_rate, relaxation_rate_from_lattice_viscosity, \
+    lattice_viscosity_from_relaxation_rate
+from lbmpy.utils import extract_shear_relaxation_rate, frobenius_norm, second_order_moment_tensor
+from pystencils import Assignment
+
+from lbmpy.enums import SubgridScaleModel
+
+
+def add_sgs_model(collision_rule, subgrid_scale_model: SubgridScaleModel, model_constant=None, omega_output_field=None,
+                  eddy_viscosity_field=None):
+    r""" Wrapper for SGS models to provide default constants and outsource SGS model handling from creation routines."""
+
+    if subgrid_scale_model == SubgridScaleModel.SMAGORINSKY:
+        model_constant = model_constant if model_constant else sp.Float(0.12)
+        return add_smagorinsky_model(collision_rule=collision_rule, smagorinsky_constant=model_constant,
+                                     omega_output_field=omega_output_field, eddy_viscosity_field=eddy_viscosity_field)
+    if subgrid_scale_model == SubgridScaleModel.QR:
+        model_constant = model_constant if model_constant else sp.Rational(1, 3)
+        return add_qr_model(collision_rule=collision_rule, qr_constant=model_constant,
+                            omega_output_field=omega_output_field, eddy_viscosity_field=eddy_viscosity_field)
+
+
+def add_smagorinsky_model(collision_rule, smagorinsky_constant, omega_output_field=None, eddy_viscosity_field=None):
+    r""" Adds a Smagorinsky model to a lattice Boltzmann collision rule. To add the Smagorinsky model to a LB scheme
+        one has to first compute the strain rate tensor $S_{ij}$ in each cell, and compute the turbulent
+        viscosity :math:`nu_t` from it. Then the local relaxation rate has to be adapted to match the total viscosity
+        :math `\nu_{total}` instead of the standard viscosity :math `\nu_0`.
+
+        A fortunate property of LB methods is, that the strain rate tensor can be computed locally from the
+        non-equilibrium part of the distribution function. This is somewhat surprising, since the strain rate tensor
+        contains first order derivatives. The strain rate tensor can be obtained by
+
+        .. math ::
+            S_{ij} = - \frac{3 \omega_s}{2 \rho_{(0)}} \Pi_{ij}^{(neq)}
+
+        where :math `\omega_s` is the relaxation rate that determines the viscosity, :math `\rho_{(0)}` is :math `\rho`
+        in compressible models and :math `1` for incompressible schemes.
+        :math `\Pi_{ij}^{(neq)}` is the second order moment tensor of the non-equilibrium part of
+        the distribution functions
+        :math `f^{(neq)} = f - f^{(eq)}` and can be computed as
+
+        .. math ::
+            \Pi_{ij}^{(neq)} = \sum_q c_{qi} c_{qj} \; f_q^{(neq)}
+
+
+    """
+    method = collision_rule.method
+    omega_s = get_shear_relaxation_rate(method)
+    omega_s, found_symbolic_shear_relaxation = extract_shear_relaxation_rate(collision_rule, omega_s)
+
+    if not found_symbolic_shear_relaxation:
+        raise ValueError("For the Smagorinsky model the shear relaxation rate has to be a symbol or it has to be "
+                         "assigned to a single equation in the assignment list")
+    f_neq = sp.Matrix(method.pre_collision_pdf_symbols) - method.get_equilibrium_terms()
+    equilibrium = method.equilibrium_distribution
+
+    tau_0 = sp.Symbol("tau_0_")
+    second_order_neq_moments = sp.Symbol("Pi")
+    rho = equilibrium.density if equilibrium.compressible else equilibrium.background_density
+    adapted_omega = sp.Symbol("sgs_omega")
+
+    collision_rule = collision_rule.new_with_substitutions({omega_s: adapted_omega}, substitute_on_lhs=False)
+    # for derivation see notebook demo_custom_LES_model.ipynb
+    eqs = [Assignment(tau_0, sp.Float(1) / omega_s),
+           Assignment(second_order_neq_moments,
+                      frobenius_norm(second_order_moment_tensor(f_neq, method.stencil), factor=2) / rho),
+           Assignment(adapted_omega,
+                      sp.Float(2) / (tau_0 + sp.sqrt(
+                          sp.Float(18) * smagorinsky_constant ** 2 * second_order_neq_moments + tau_0 ** 2)))]
+    collision_rule.subexpressions += eqs
+    collision_rule.topological_sort(sort_subexpressions=True, sort_main_assignments=False)
+
+    if eddy_viscosity_field:
+        collision_rule.main_assignments.append(Assignment(
+            eddy_viscosity_field.center,
+            smagorinsky_constant ** 2 * sp.Rational(3, 2) * adapted_omega * second_order_neq_moments
+        ))
+
+    if omega_output_field:
+        collision_rule.main_assignments.append(Assignment(omega_output_field.center, adapted_omega))
+
+    return collision_rule
+
+
+def add_qr_model(collision_rule, qr_constant, omega_output_field=None, eddy_viscosity_field=None):
+    r""" Adds a QR model to a lattice Boltzmann collision rule, see :cite:`rozema15`.
+    WARNING : this subgrid-scale model is only defined for isotropic grids
+    """
+    method = collision_rule.method
+    omega_s = get_shear_relaxation_rate(method)
+    omega_s, found_symbolic_shear_relaxation = extract_shear_relaxation_rate(collision_rule, omega_s)
+
+    if not found_symbolic_shear_relaxation:
+        raise ValueError("For the QR model the shear relaxation rate has to be a symbol or it has to be "
+                         "assigned to a single equation in the assignment list")
+    f_neq = sp.Matrix(method.pre_collision_pdf_symbols) - method.get_equilibrium_terms()
+    equilibrium = method.equilibrium_distribution
+
+    nu_0, nu_e = sp.symbols("qr_nu_0 qr_nu_e")
+    c_pi_s = sp.Symbol("qr_c_pi")
+    rho = equilibrium.density if equilibrium.compressible else equilibrium.background_density
+    adapted_omega = sp.Symbol("sgs_omega")
+
+    stencil = method.stencil
+
+    pi = second_order_moment_tensor(f_neq, stencil)
+
+    r_prime = sp.Symbol("qr_r_prime")
+    q_prime = sp.Symbol("qr_q_prime")
+
+    c_pi = qr_constant * sp.Piecewise(
+        (r_prime, sp.StrictGreaterThan(r_prime, sp.Float(0))),
+        (sp.Float(0), True)
+    ) / q_prime
+
+    collision_rule = collision_rule.new_with_substitutions({omega_s: adapted_omega}, substitute_on_lhs=False)
+
+    if stencil.D == 2:
+        nu_e_assignments = [Assignment(nu_e, c_pi_s)]
+    elif stencil.D == 3:
+        base_viscosity = sp.Symbol("qr_base_viscosity")
+        nu_e_assignments = [
+            Assignment(base_viscosity, sp.Float(6) * nu_0 + sp.Float(1)),
+            Assignment(nu_e, (-base_viscosity + sp.sqrt(base_viscosity ** 2 + sp.Float(72) * c_pi_s / rho))
+                       / sp.Float(12))
+        ]
+    else:
+        raise ValueError("QR-model is only defined for 2- or 3-dimensional flows")
+
+    matrix_entries = sp.Matrix(stencil.D, stencil.D, sp.symbols(f"sgs_qr_pi:{stencil.D ** 2}"))
+
+    eqs = [Assignment(nu_0, lattice_viscosity_from_relaxation_rate(omega_s)),
+           *[Assignment(matrix_entries[i], pi[i]) for i in range(stencil.D ** 2)],
+           Assignment(r_prime, sp.Float(-1) ** (stencil.D + 1) * matrix_entries.det()),
+           Assignment(q_prime, sp.Rational(1, 2) * (matrix_entries * matrix_entries).trace()),
+           Assignment(c_pi_s, c_pi),
+           *nu_e_assignments,
+           Assignment(adapted_omega, relaxation_rate_from_lattice_viscosity(nu_0 + nu_e))]
+    collision_rule.subexpressions += eqs
+    collision_rule.topological_sort(sort_subexpressions=True, sort_main_assignments=False)
+
+    if eddy_viscosity_field:
+        collision_rule.main_assignments.append(Assignment(eddy_viscosity_field.center, nu_e))
+
+    if omega_output_field:
+        collision_rule.main_assignments.append(Assignment(omega_output_field.center, adapted_omega))
+
+    return collision_rule
--- a/lbmpy/updatekernels.py
+++ b/lbmpy/updatekernels.py
-import numpy as np
 import sympy as sp

 from lbmpy.fieldaccess import StreamPullTwoFieldsAccessor
 from lbmpy.methods.abstractlbmethod import LbmCollisionRule
-from pystencils import Assignment, AssignmentCollection, Field
+from pystencils import Assignment, AssignmentCollection
 from pystencils.field import create_numpy_array_with_layout, layout_string_to_tuple
 from pystencils.simp import add_subexpressions_for_field_reads
 from pystencils.sympyextensions import fast_subs
@@ -11,30 +10,34 @@ from pystencils.sympyextensions import fast_subs
 # -------------------------------------------- LBM Kernel Creation -----------------------------------------------------


-def create_lbm_kernel(collision_rule, input_field, output_field, accessor):
+def create_lbm_kernel(collision_rule, src_field, dst_field=None, accessor=StreamPullTwoFieldsAccessor(),
+                      data_type=None):
    """Replaces the pre- and post collision symbols in the collision rule by field accesses.

    Args:
        collision_rule:  instance of LbmCollisionRule, defining the collision step
-        input_field: field used for reading pdf values
-        output_field: field used for writing pdf values
-                      if accessor.is_inplace this parameter is ignored
+        src_field: field used for reading pdf values
+        dst_field: field used for writing pdf values if accessor.is_inplace this parameter is ignored
        accessor: instance of PdfFieldAccessor, defining where to read and write values
-                  to create e.g. a fused stream-collide kernel
+                  to create e.g. a fused stream-collide kernel See 'fieldaccess.PdfFieldAccessor'
+        data_type: If a datatype is given the field reads are isolated and written to TypedSymbols of type data_type

    Returns:
        LbmCollisionRule where pre- and post collision symbols have been replaced
    """
    if accessor.is_inplace:
-        output_field = input_field
+        dst_field = src_field
+
+    if not accessor.is_inplace and dst_field is None:
+        raise ValueError("For two field accessors a destination field has to be provided")

    method = collision_rule.method
    pre_collision_symbols = method.pre_collision_pdf_symbols
    post_collision_symbols = method.post_collision_pdf_symbols
    substitutions = {}

-    input_accesses = accessor.read(input_field, method.stencil)
-    output_accesses = accessor.write(output_field, method.stencil)
+    input_accesses = accessor.read(src_field, method.stencil)
+    output_accesses = accessor.write(dst_field, method.stencil)

    for (idx, offset), input_access, output_access in zip(enumerate(method.stencil), input_accesses, output_accesses):
        substitutions[pre_collision_symbols[idx]] = input_access
@@ -48,41 +51,67 @@ def create_lbm_kernel(collision_rule, input_field, output_field, accessor):
            new_split_groups.append([fast_subs(e, substitutions) for e in split_group])
        result.simplification_hints['split_groups'] = new_split_groups

-    if accessor.is_inplace:
-        result = add_subexpressions_for_field_reads(result, subexpressions=True, main_assignments=True)
+    if accessor.is_inplace or (data_type is not None and src_field.dtype != data_type):
+        result = add_subexpressions_for_field_reads(result, subexpressions=True, main_assignments=True,
+                                                    data_type=data_type)

    return result


-def create_stream_pull_only_kernel(stencil, numpy_arr=None, src_field_name="src", dst_field_name="dst",
-                                   generic_layout='numpy', generic_field_type=np.float64):
-    """Creates a stream-pull kernel, without collision.
+def create_stream_only_kernel(stencil, src_field, dst_field=None, accessor=StreamPullTwoFieldsAccessor()):
+    """Creates a stream kernel, without collision.

-    For parameters see function ``create_stream_pull_collide_kernel``
+    Args:
+        stencil: lattice Boltzmann stencil which is used in the form of a tuple of tuples
+        src_field: field used for reading pdf values
+        dst_field: field used for writing pdf values if accessor.is_inplace this parameter is not necessary but it
+                   is used if provided
+        accessor: instance of PdfFieldAccessor, defining where to read and write values
+                  to create e.g. a fused stream-collide kernel See 'fieldaccess.PdfFieldAccessor'
+
+    Returns:
+        AssignmentCollection of the stream only update rule
    """
+    if accessor.is_inplace and dst_field is None:
+        dst_field = src_field
+
+    if not accessor.is_inplace and dst_field is None:
+        raise ValueError("For two field accessors a destination field has to be provided")

-    dim = len(stencil[0])
-    if numpy_arr is None:
-        src = Field.create_generic(src_field_name, dim, index_shape=(len(stencil),),
-                                   layout=generic_layout, dtype=generic_field_type)
-        dst = Field.create_generic(dst_field_name, dim, index_shape=(len(stencil),),
-                                   layout=generic_layout, dtype=generic_field_type)
-    else:
-        src = Field.create_from_numpy_array(src_field_name, numpy_arr, index_dimensions=1)
-        dst = Field.create_from_numpy_array(dst_field_name, numpy_arr, index_dimensions=1)
+    temporary_symbols = sp.symbols(f'streamed_:{stencil.Q}')
+    subexpressions = [Assignment(tmp, acc) for tmp, acc in zip(temporary_symbols, accessor.read(src_field, stencil))]
+    main_assignments = [Assignment(acc, tmp) for acc, tmp in zip(accessor.write(dst_field, stencil), temporary_symbols)]
+    return AssignmentCollection(main_assignments, subexpressions=subexpressions)

-    accessor = StreamPullTwoFieldsAccessor()
-    eqs = [Assignment(a, b) for a, b in zip(accessor.write(dst, stencil), accessor.read(src, stencil))]
-    return AssignmentCollection(eqs, [])

+def create_stream_pull_with_output_kernel(lb_method, src_field, dst_field=None, output=None,
+                                          accessor=StreamPullTwoFieldsAccessor()):
+    """Creates a stream kernel, without collision but macroscopic quantaties like density or velocity can be calculated.
+
+    Args:
+        lb_method: lattice Boltzmann method see 'creationfunctions.create_lb_method'
+        src_field: field used for reading pdf values
+        dst_field: field used for writing pdf values if accessor.is_inplace this parameter is ignored
+        output: dictonary which containes macroscopic quantities as keys which should be calculated and fields as
+                values which should be used to write the data e.g.: {'density': density_field}
+        accessor: instance of PdfFieldAccessor, defining where to read and write values
+                  to create e.g. a fused stream-collide kernel See 'fieldaccess.PdfFieldAccessor'
+
+    Returns:
+        AssignmentCollection of the stream only update rule
+    """
+    if accessor.is_inplace:
+        dst_field = src_field
+
+    if not accessor.is_inplace and dst_field is None:
+        raise ValueError("For two field accessors a destination field has to be provided")

-def create_stream_pull_with_output_kernel(lb_method, src_field, dst_field, output):
    stencil = lb_method.stencil
    cqc = lb_method.conserved_quantity_computation
-    streamed = sp.symbols("streamed_:%d" % (len(stencil),))
-    accessor = StreamPullTwoFieldsAccessor()
+    streamed = sp.symbols(f"streamed_:{stencil.Q}")
    stream_assignments = [Assignment(a, b) for a, b in zip(streamed, accessor.read(src_field, stencil))]
-    output_eq_collection = cqc.output_equations_from_pdfs(streamed, output)
+    output_eq_collection = cqc.output_equations_from_pdfs(streamed, output) if output\
+        else AssignmentCollection(main_assignments=[])
    write_eqs = [Assignment(a, b) for a, b in zip(accessor.write(dst_field, stencil), streamed)]

    subexpressions = stream_assignments + output_eq_collection.subexpressions
@@ -90,6 +119,27 @@ def create_stream_pull_with_output_kernel(lb_method, src_field, dst_field, outpu
    return LbmCollisionRule(lb_method, main_eqs, subexpressions,
                            simplification_hints=output_eq_collection.simplification_hints)

+
+def create_copy_kernel(stencil, src_field, dst_field, accessor=StreamPullTwoFieldsAccessor()):
+    """Creates a copy kernel, which can be used to transfer information from src to dst field.
+
+    Args:
+        stencil: lattice Boltzmann stencil which is used in the form of a tuple of tuples
+        src_field: field used for reading pdf values
+        dst_field: field used for writing pdf values
+        accessor: instance of PdfFieldAccessor, defining where to read and write values
+                  to create e.g. a fused stream-collide kernel See 'fieldaccess.PdfFieldAccessor'
+
+    Returns:
+        AssignmentCollection of a copy update rule
+    """
+
+    temporary_symbols = sp.symbols(f'copied:{stencil.Q}')
+    subexpressions = [Assignment(tmp, acc) for tmp, acc in zip(temporary_symbols, accessor.write(src_field, stencil))]
+    main_assignments = [Assignment(acc, tmp) for acc, tmp in zip(accessor.write(dst_field, stencil), temporary_symbols)]
+    return AssignmentCollection(main_assignments, subexpressions=subexpressions)
+
+
 # ---------------------------------- Pdf array creation for various layouts --------------------------------------------



--- a/src/lbmpy/utils.py
+++ b/src/lbmpy/utils.py
+import sympy as sp
+
+
+def second_order_moment_tensor(function_values, stencil):
+    """Returns (D x D) Matrix of second order moments of the given function where D is the dimension"""
+    assert len(function_values) == stencil.Q
+    return sp.Matrix(stencil.D, stencil.D, lambda i, j: sum(c[i] * c[j] * f for f, c in zip(function_values, stencil)))
+
+
+def frobenius_norm(matrix, factor=1):
+    """Computes the Frobenius norm of a matrix defined as the square root of the sum of squared matrix elements
+    The optional factor is added inside the square root"""
+    return sp.sqrt(sum(i * i for i in matrix) * factor)
+
+
+def extract_shear_relaxation_rate(collision_rule, shear_relaxation_rate):
+    """Searches for the shear relaxation rate in the collision equations.
+       If the shear relaxation rate is assigned to a single assignment its lhs is returned.
+       Otherwise, the shear relaxation rate has to be a sympy symbol, or it can not be extracted"""
+    found_symbolic_shear_relaxation = True
+    if isinstance(shear_relaxation_rate, (float, int)):
+        found_symbolic_shear_relaxation = False
+        for eq in collision_rule.all_assignments:
+            if eq.rhs == shear_relaxation_rate:
+                found_symbolic_shear_relaxation = True
+                shear_relaxation_rate = eq.lhs
+
+    return shear_relaxation_rate, found_symbolic_shear_relaxation
--- a/lbmpy/phasefield_allen_cahn/__init__.py
+++ b/lbmpy/phasefield_allen_cahn/__init__.py
--- a/tests/advanced_setups/Lees_Edwards_boundary_conditions.ipynb
+++ b/tests/advanced_setups/Lees_Edwards_boundary_conditions.ipynb
--- a/tests/advanced_streaming/test_advanced_streaming_noslip.py
+++ b/tests/advanced_streaming/test_advanced_streaming_noslip.py
+import numpy as np
+
+from lbmpy.advanced_streaming import Timestep
+from lbmpy.boundaries import NoSlip
+from lbmpy.boundaries.boundaryhandling import create_lattice_boltzmann_boundary_kernel
+from lbmpy.advanced_streaming.utility import streaming_patterns, inverse_dir_index, AccessPdfValues
+from lbmpy.enums import Method, Stencil
+from lbmpy.creationfunctions import create_lb_method, LBMConfig
+from lbmpy.stencils import LBStencil
+
+import pystencils as ps
+
+from pystencils.boundaries.createindexlist import numpy_data_type_for_boundary_object
+from pystencils import TypedSymbol
+from pystencils.field import Field, FieldType
+
+import pytest
+
+
+@pytest.mark.parametrize("stencil", [Stencil.D2Q9, Stencil.D3Q19, Stencil.D3Q27])
+@pytest.mark.parametrize("streaming_pattern", streaming_patterns)
+@pytest.mark.parametrize("prev_timestep", [Timestep.EVEN, Timestep.ODD])
+def test_advanced_streaming_noslip_single_cell(stencil, streaming_pattern, prev_timestep):
+    """
+    Advanced Streaming NoSlip Test
+    """
+
+    stencil = LBStencil(stencil)
+    pdf_field = ps.fields(f'pdfs({stencil.Q}): [{stencil.D}D]')
+
+    prev_pdf_access = AccessPdfValues(stencil, streaming_pattern, prev_timestep, 'out')
+    next_pdf_access = AccessPdfValues(stencil, streaming_pattern, prev_timestep.next(), 'in')
+
+    pdfs = np.zeros((3,) * stencil.D + (stencil.Q,))
+    pos = (1,) * stencil.D
+    for d in range(stencil.Q):
+        prev_pdf_access.write_pdf(pdfs, pos, d, d)
+
+    lbm_config = LBMConfig(stencil=stencil, method=Method.SRT)
+    lb_method = create_lb_method(lbm_config=lbm_config)
+    noslip = NoSlip()
+
+    index_struct_dtype = numpy_data_type_for_boundary_object(noslip, stencil.D)
+
+    index_field = Field('indexVector', FieldType.INDEXED, index_struct_dtype, layout=[0],
+                        shape=(TypedSymbol("indexVectorSize", np.int32), 1), strides=(1, 1))
+    index_vector = np.array([pos + (d,) for d in range(stencil.Q)], dtype=index_struct_dtype)
+
+    ast = create_lattice_boltzmann_boundary_kernel(pdf_field,
+                                                   index_field, lb_method, noslip,
+                                                   prev_timestep=prev_timestep,
+                                                   streaming_pattern=streaming_pattern)
+
+    flex_kernel = ast.compile()
+
+    flex_kernel(pdfs=pdfs, indexVector=index_vector, indexVectorSize=len(index_vector))
+
+    reflected_pdfs = [next_pdf_access.read_pdf(pdfs, pos, d) for d in range(stencil.Q)]
+    inverse_pdfs = [inverse_dir_index(stencil, d) for d in range(stencil.Q)]
+    assert reflected_pdfs == inverse_pdfs
--- a/tests/advanced_streaming/test_communication.py
+++ b/tests/advanced_streaming/test_communication.py
+import pystencils as ps
+
+import numpy as np
+
+from lbmpy.stencils import LBStencil
+from pystencils.slicing import get_slice_before_ghost_layer, get_ghost_region_slice
+from lbmpy.creationfunctions import create_lb_update_rule, LBMConfig, LBMOptimisation
+from lbmpy.advanced_streaming.communication import (
+    get_communication_slices,
+    _fix_length_one_slices,
+    LBMPeriodicityHandling,
+    periodic_pdf_gpu_copy_kernel,
+)
+from lbmpy.advanced_streaming.utility import streaming_patterns, Timestep
+from lbmpy.enums import Stencil
+
+import pytest
+
+
+@pytest.mark.parametrize(
+    "stencil", [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27]
+)
+@pytest.mark.parametrize("streaming_pattern", streaming_patterns)
+@pytest.mark.parametrize("timestep", [Timestep.EVEN, Timestep.ODD])
+def test_slices_not_empty(stencil, streaming_pattern, timestep):
+    stencil = LBStencil(stencil)
+    arr = np.zeros((4,) * stencil.D + (stencil.Q,))
+    slices = get_communication_slices(
+        stencil,
+        streaming_pattern=streaming_pattern,
+        prev_timestep=timestep,
+        ghost_layers=1,
+    )
+    for _, slices_list in slices.items():
+        for src, dst in slices_list:
+            assert all(s != 0 for s in arr[src].shape)
+            assert all(s != 0 for s in arr[dst].shape)
+
+
+@pytest.mark.parametrize(
+    "stencil", [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27]
+)
+@pytest.mark.parametrize("streaming_pattern", streaming_patterns)
+@pytest.mark.parametrize("timestep", [Timestep.EVEN, Timestep.ODD])
+def test_src_dst_same_shape(stencil, streaming_pattern, timestep):
+    stencil = LBStencil(stencil)
+    arr = np.zeros((4,) * stencil.D + (stencil.Q,))
+    slices = get_communication_slices(
+        stencil,
+        streaming_pattern=streaming_pattern,
+        prev_timestep=timestep,
+        ghost_layers=1,
+    )
+    for _, slices_list in slices.items():
+        for src, dst in slices_list:
+            src_shape = arr[src].shape
+            dst_shape = arr[dst].shape
+            assert src_shape == dst_shape
+
+
+@pytest.mark.parametrize(
+    "stencil", [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27]
+)
+def test_pull_communication_slices(stencil):
+    stencil = LBStencil(stencil)
+
+    slices = get_communication_slices(
+        stencil, streaming_pattern="pull", prev_timestep=Timestep.BOTH, ghost_layers=1
+    )
+    for i, d in enumerate(stencil):
+        if i == 0:
+            continue
+
+        for s in slices[d]:
+            if s[0][-1] == i:
+                src = s[0][:-1]
+                dst = s[1][:-1]
+                break
+
+        inner_slice = _fix_length_one_slices(
+            get_slice_before_ghost_layer(d, ghost_layers=1)
+        )
+        inv_dir = (-e for e in d)
+        gl_slice = _fix_length_one_slices(
+            get_ghost_region_slice(inv_dir, ghost_layers=1)
+        )
+        assert src == inner_slice
+        assert dst == gl_slice
+
+
+@pytest.mark.parametrize("direction", LBStencil(Stencil.D3Q27).stencil_entries)
+@pytest.mark.parametrize("pull", [False, True])
+def test_gpu_comm_kernels(direction: tuple, pull: bool):
+    pytest.importorskip("cupy")
+
+    stencil = LBStencil(Stencil.D3Q27)
+    inv_dir = stencil[stencil.inverse_index(direction)]
+    target = ps.Target.GPU
+
+    domain_size = (4,) * stencil.D
+
+    dh: ps.datahandling.SerialDataHandling = ps.create_data_handling(
+        domain_size,
+        periodicity=(True,) * stencil.D,
+        parallel=False,
+        default_target=target,
+    )
+
+    field = dh.add_array("field", values_per_cell=2)
+
+    if pull:
+        dst_slice = get_ghost_region_slice(inv_dir)
+        src_slice = get_slice_before_ghost_layer(direction)
+    else:
+        dst_slice = get_slice_before_ghost_layer(direction)
+        src_slice = get_ghost_region_slice(inv_dir)
+
+    src_slice += (1,)
+    dst_slice += (1,)
+
+    kernel = periodic_pdf_gpu_copy_kernel(field, src_slice, dst_slice)
+
+    dh.cpu_arrays[field.name][src_slice] = 42.0
+    dh.all_to_gpu()
+
+    dh.run_kernel(kernel)
+
+    dh.all_to_cpu()
+    np.testing.assert_equal(dh.cpu_arrays[field.name][dst_slice], 42.0)
+
+
+@pytest.mark.parametrize("stencil", [Stencil.D2Q9, Stencil.D3Q19])
+@pytest.mark.parametrize("streaming_pattern", streaming_patterns)
+def test_direct_copy_and_kernels_equivalence(stencil: Stencil, streaming_pattern: str):
+    pytest.importorskip("cupy")
+
+    target = ps.Target.GPU
+    stencil = LBStencil(stencil)
+    domain_size = (4,) * stencil.D
+
+    dh: ps.datahandling.SerialDataHandling = ps.create_data_handling(
+        domain_size,
+        periodicity=(True,) * stencil.D,
+        parallel=False,
+        default_target=target,
+    )
+
+    pdfs_a = dh.add_array("pdfs_a", values_per_cell=stencil.Q)
+    pdfs_b = dh.add_array("pdfs_b", values_per_cell=stencil.Q)
+
+    dh.fill(pdfs_a.name, 0.0, ghost_layers=True)
+    dh.fill(pdfs_b.name, 0.0, ghost_layers=True)
+
+    for q in range(stencil.Q):
+        sl = ps.make_slice[:4, :4, q] if stencil.D == 2 else ps.make_slice[:4, :4, :4, q]
+        dh.cpu_arrays[pdfs_a.name][sl] = q
+        dh.cpu_arrays[pdfs_b.name][sl] = q
+
+    dh.all_to_gpu()
+
+    direct_copy = LBMPeriodicityHandling(stencil, dh, pdfs_a.name, streaming_pattern, cupy_direct_copy=True)
+    kernels_copy = LBMPeriodicityHandling(stencil, dh, pdfs_b.name, streaming_pattern, cupy_direct_copy=False)
+
+    direct_copy(Timestep.EVEN)
+    kernels_copy(Timestep.EVEN)
+
+    dh.all_to_cpu()
+
+    np.testing.assert_equal(
+        dh.cpu_arrays[pdfs_a.name],
+        dh.cpu_arrays[pdfs_b.name]
+    )
+
+
+@pytest.mark.parametrize(
+    "stencil_name", [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27]
+)
+def test_optimised_and_full_communication_equivalence(stencil_name):
+    target = ps.Target.CPU
+    stencil = LBStencil(stencil_name)
+    domain_size = (4,) * stencil.D
+
+    dh = ps.create_data_handling(
+        domain_size,
+        periodicity=(True,) * stencil.D,
+        parallel=False,
+        default_target=target,
+    )
+
+    pdf = dh.add_array("pdf", values_per_cell=len(stencil), dtype=np.int64)
+    dh.fill("pdf", 0, ghost_layers=True)
+    pdf_tmp = dh.add_array("pdf_tmp", values_per_cell=len(stencil), dtype=np.int64)
+    dh.fill("pdf_tmp", 0, ghost_layers=True)
+
+    gl = dh.ghost_layers_of_field("pdf")
+
+    num = 0
+    for idx, x in np.ndenumerate(dh.cpu_arrays["pdf"]):
+        dh.cpu_arrays["pdf"][idx] = num
+        dh.cpu_arrays["pdf_tmp"][idx] = num
+        num += 1
+
+    lbm_config = LBMConfig(stencil=stencil, kernel_type="stream_pull_only")
+    lbm_opt = LBMOptimisation(symbolic_field=pdf, symbolic_temporary_field=pdf_tmp)
+    config = ps.CreateKernelConfig(target=dh.default_target, data_type=np.int64, cpu_openmp=True)
+
+    ac = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
+    ast = ps.create_kernel(ac, config=config)
+    stream = ast.compile()
+
+    full_communication = dh.synchronization_function(
+        pdf.name, target=dh.default_target, optimization={"openmp": True}
+    )
+    full_communication()
+
+    dh.run_kernel(stream)
+    dh.swap("pdf", "pdf_tmp")
+    pdf_full_communication = np.copy(dh.cpu_arrays["pdf"])
+
+    num = 0
+    for idx, x in np.ndenumerate(dh.cpu_arrays["pdf"]):
+        dh.cpu_arrays["pdf"][idx] = num
+        dh.cpu_arrays["pdf_tmp"][idx] = num
+        num += 1
+
+    optimised_communication = LBMPeriodicityHandling(
+        stencil=stencil,
+        data_handling=dh,
+        pdf_field_name=pdf.name,
+        streaming_pattern="pull",
+    )
+    optimised_communication()
+    dh.run_kernel(stream)
+    dh.swap("pdf", "pdf_tmp")
+
+    if stencil.D == 3:
+        for i in range(gl, domain_size[0]):
+            for j in range(gl, domain_size[1]):
+                for k in range(gl, domain_size[2]):
+                    for f in range(len(stencil)):
+                        assert (
+                            dh.cpu_arrays["pdf"][i, j, k, f]
+                            == pdf_full_communication[i, j, k, f]
+                        ), print(f)
+    else:
+        for i in range(gl, domain_size[0]):
+            for j in range(gl, domain_size[1]):
+                for f in range(len(stencil)):
+                    assert (
+                        dh.cpu_arrays["pdf"][i, j, f] == pdf_full_communication[i, j, f]
+                    )
--- a/tests/advanced_streaming/test_fully_periodic_flow.py
+++ b/tests/advanced_streaming/test_fully_periodic_flow.py
+import numpy as np
+from dataclasses import replace
+
+from pystencils.datahandling import create_data_handling
+from pystencils import create_kernel, Target, CreateKernelConfig
+
+from lbmpy.creationfunctions import create_lb_collision_rule, create_lb_function, LBMConfig, LBMOptimisation
+from lbmpy.enums import Method, Stencil
+from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter
+from lbmpy.stencils import LBStencil
+
+from lbmpy.advanced_streaming import LBMPeriodicityHandling
+from lbmpy.advanced_streaming.utility import is_inplace, streaming_patterns, get_timesteps
+
+import pytest
+from numpy.testing import assert_allclose
+
+all_results = dict()
+
+targets = [Target.CPU]
+
+try:
+    import cupy
+    targets += [Target.GPU]
+except Exception:
+    pass
+
+
+@pytest.mark.parametrize('target', targets)
+@pytest.mark.parametrize('stencil', [Stencil.D2Q9, Stencil.D3Q19, Stencil.D3Q27])
+@pytest.mark.parametrize('streaming_pattern', streaming_patterns)
+@pytest.mark.longrun
+def test_fully_periodic_flow(target, stencil, streaming_pattern):
+    gpu = False
+    if target == Target.GPU:
+        gpu = True
+
+    #   Stencil
+    stencil = LBStencil(stencil)
+
+    #   Streaming
+    inplace = is_inplace(streaming_pattern)
+    timesteps = get_timesteps(streaming_pattern)
+    zeroth_timestep = timesteps[0]
+
+    #   Data Handling and PDF fields
+    domain_size = (30,) * stencil.D
+    periodicity = (True,) * stencil.D
+
+    dh = create_data_handling(domain_size=domain_size, periodicity=periodicity, default_target=target)
+
+    pdfs = dh.add_array('pdfs', stencil.Q)
+    if not inplace:
+        pdfs_tmp = dh.add_array_like('pdfs_tmp', pdfs.name)
+
+    #   LBM Streaming and Collision
+    lbm_config = LBMConfig(stencil=stencil, method=Method.SRT,
+                           relaxation_rate=1.0, streaming_pattern=streaming_pattern)
+
+    lbm_opt = LBMOptimisation(symbolic_field=pdfs)
+    config = CreateKernelConfig(target=target)
+
+    if not inplace:
+        lbm_opt = replace(lbm_opt, symbolic_temporary_field=pdfs_tmp)
+
+    lb_collision = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt, config=config)
+    lb_method = lb_collision.method
+
+    lb_kernels = []
+    for t in timesteps:
+        lbm_config = replace(lbm_config, timestep=t)
+        lb_kernels.append(create_lb_function(collision_rule=lb_collision,
+                                             lbm_config=lbm_config, lbm_optimisation=lbm_opt))
+
+    #   Macroscopic Values
+    density = 1.0
+    density_field = dh.add_array('rho', 1)
+    u_x = 0.01
+    velocity = (u_x,) * stencil.D
+    velocity_field = dh.add_array('u', stencil.D)
+
+    u_ref = np.full(domain_size + (stencil.D,), u_x)
+
+    setter = macroscopic_values_setter(
+        lb_method, density, velocity, pdfs,
+        streaming_pattern=streaming_pattern, previous_timestep=zeroth_timestep)
+    setter_kernel = create_kernel(setter,
+                                  config=CreateKernelConfig(target=target, ghost_layers=1)).compile()
+
+    getter_kernels = []
+    for t in timesteps:
+        getter = macroscopic_values_getter(
+            lb_method, density_field, velocity_field, pdfs,
+            streaming_pattern=streaming_pattern, previous_timestep=t)
+        getter_kernels.append(create_kernel(getter,
+                                            config=CreateKernelConfig(target=target, ghost_layers=1)).compile())
+
+    #   Periodicity
+    periodicity_handler = LBMPeriodicityHandling(stencil, dh, pdfs.name, streaming_pattern=streaming_pattern)
+
+    # Initialization and Timestep
+    current_timestep = zeroth_timestep
+
+    def init():
+        global current_timestep
+        current_timestep = zeroth_timestep
+        dh.run_kernel(setter_kernel)
+
+    def one_step():
+        global current_timestep
+
+        # Periodicty
+        periodicity_handler(current_timestep)
+
+        # Here, the next time step begins
+        current_timestep = current_timestep.next()
+
+        # LBM Step
+        dh.run_kernel(lb_kernels[current_timestep.idx])
+
+        # Field Swaps
+        if not inplace:
+            dh.swap(pdfs.name, pdfs_tmp.name)
+
+        # Macroscopic Values
+        dh.run_kernel(getter_kernels[current_timestep.idx])
+
+    #   Run the simulation
+    init()
+
+    for _ in range(100):
+        one_step()
+
+    #   Evaluation
+    if gpu:
+        dh.to_cpu(velocity_field.name)
+    u = dh.gather_array(velocity_field.name)
+
+    #   Equal to the steady-state velocity field up to numerical errors
+    assert_allclose(u, u_ref)
+
+    #   Flow must be equal up to numerical error for all streaming patterns
+    global all_results
+    for key, prev_u in all_results.items():
+        if key[0] == stencil:
+            prev_pattern = key[1]
+            assert_allclose(
+                u, prev_u, err_msg=f'Velocity field for {streaming_pattern} differed from {prev_pattern}!')
+    all_results[(stencil, streaming_pattern)] = u
--- a/tests/advanced_streaming/test_periodic_pipe_with_force.py
+++ b/tests/advanced_streaming/test_periodic_pipe_with_force.py
+import numpy as np
+from dataclasses import replace
+
+from pystencils.datahandling import create_data_handling
+from pystencils import create_kernel, Target, CreateKernelConfig
+
+from lbmpy.creationfunctions import create_lb_collision_rule, create_lb_function, LBMConfig, LBMOptimisation
+from lbmpy.enums import ForceModel, Method, Stencil
+from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter
+from lbmpy.stencils import LBStencil
+
+from lbmpy.advanced_streaming import LBMPeriodicityHandling
+from lbmpy.boundaries import NoSlip, LatticeBoltzmannBoundaryHandling
+from lbmpy.advanced_streaming.utility import is_inplace, streaming_patterns, get_timesteps
+
+import pytest
+from numpy.testing import assert_allclose
+
+all_results = dict()
+
+targets = [Target.CPU]
+
+try:
+    import cupy
+    targets += [Target.GPU]
+except Exception:
+    pass
+
+
+class PeriodicPipeFlow:
+    def __init__(self, stencil, streaming_pattern, wall_boundary=None, target=Target.CPU):
+
+        if wall_boundary is None:
+            wall_boundary = NoSlip()
+
+        self.target = target
+        self.gpu = target in [Target.GPU]
+
+        #   Stencil
+        self.stencil = stencil
+        self.q = stencil.Q
+        self.dim = stencil.D
+
+        #   Streaming
+        self.streaming_pattern = streaming_pattern
+        self.inplace = is_inplace(self.streaming_pattern)
+        self.timesteps = get_timesteps(streaming_pattern)
+        self.zeroth_timestep = self.timesteps[0]
+
+        #   Domain, Data Handling and PDF fields
+        self.pipe_length = 60
+        self.pipe_radius = 15
+        self.domain_size = (self.pipe_length, ) + (2 * self.pipe_radius,) * (self.dim - 1)
+        self.periodicity = (True, ) + (False, ) * (self.dim - 1)
+        self.force = (0.0001, ) + (0.0,) * (self.dim - 1)
+
+        self.dh = create_data_handling(domain_size=self.domain_size,
+                                       periodicity=self.periodicity, default_target=self.target)
+
+        self.pdfs = self.dh.add_array('pdfs', self.q)
+        if not self.inplace:
+            self.pdfs_tmp = self.dh.add_array_like('pdfs_tmp', self.pdfs.name)
+
+        #   LBM Streaming and Collision
+        lbm_config = LBMConfig(stencil=stencil, method=Method.SRT, relaxation_rate=1.0,
+                               force_model=ForceModel.GUO, force=self.force, streaming_pattern=streaming_pattern)
+
+        lbm_opt = LBMOptimisation(symbolic_field=self.pdfs)
+        config = CreateKernelConfig(target=self.target)
+
+        if not self.inplace:
+            lbm_opt = replace(lbm_opt, symbolic_temporary_field=self.pdfs_tmp)
+
+        self.lb_collision = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
+        self.lb_method = self.lb_collision.method
+
+        self.lb_kernels = []
+        for t in self.timesteps:
+            lbm_config = replace(lbm_config, timestep=t)
+            self.lb_kernels.append(create_lb_function(collision_rule=self.lb_collision,
+                                                      lbm_config=lbm_config,
+                                                      lbm_optimisation=lbm_opt,
+                                                      config=config))
+
+        #   Macroscopic Values
+        self.density = 1.0
+        self.density_field = self.dh.add_array('rho', 1)
+        u_x = 0.0
+        self.velocity = (u_x,) * self.dim
+        self.velocity_field = self.dh.add_array('u', self.dim)
+
+        setter = macroscopic_values_setter(
+            self.lb_method, self.density, self.velocity, self.pdfs,
+            streaming_pattern=self.streaming_pattern, previous_timestep=self.zeroth_timestep)
+        self.init_kernel = create_kernel(setter,
+                                         config=CreateKernelConfig(target=target, ghost_layers=1)).compile()
+
+        self.getter_kernels = []
+        for t in self.timesteps:
+            getter = macroscopic_values_getter(
+                self.lb_method, self.density_field, self.velocity_field, self.pdfs,
+                streaming_pattern=self.streaming_pattern, previous_timestep=t)
+            self.getter_kernels.append(create_kernel(getter,
+                                                     config=CreateKernelConfig(target=target, ghost_layers=1)).compile())
+
+        #   Periodicity
+        self.periodicity_handler = LBMPeriodicityHandling(
+            self.stencil, self.dh, self.pdfs.name, streaming_pattern=self.streaming_pattern)
+
+        #   Boundary Handling
+        self.wall = wall_boundary
+        self.bh = LatticeBoltzmannBoundaryHandling(
+            self.lb_method, self.dh, self.pdfs.name,
+            streaming_pattern=self.streaming_pattern, target=self.target)
+
+        self.bh.set_boundary(boundary_obj=self.wall, mask_callback=self.mask_callback)
+
+        self.current_timestep = self.zeroth_timestep
+
+    def mask_callback(self, x, y, z=None):
+        y = y - self.pipe_radius
+        z = z - self.pipe_radius if z is not None else 0
+        return np.sqrt(y**2 + z**2) >= self.pipe_radius
+
+    def init(self):
+        self.current_timestep = self.zeroth_timestep
+        self.dh.run_kernel(self.init_kernel)
+
+    def step(self):
+        #   Order matters! First communicate, then boundaries, otherwise
+        #   periodicity handling overwrites reflected populations
+        # Periodicty
+        self.periodicity_handler(self.current_timestep)
+
+        # Boundaries
+        self.bh(prev_timestep=self.current_timestep)
+
+        # Here, the next time step begins
+        self.current_timestep = self.current_timestep.next()
+
+        # LBM Step
+        self.dh.run_kernel(self.lb_kernels[self.current_timestep.idx])
+
+        # Field Swaps
+        if not self.inplace:
+            self.dh.swap(self.pdfs.name, self.pdfs_tmp.name)
+
+        # Macroscopic Values
+        self.dh.run_kernel(self.getter_kernels[self.current_timestep.idx])
+
+    def run(self, iterations):
+        for _ in range(iterations):
+            self.step()
+
+    @property
+    def velocity_array(self):
+        if self.gpu:
+            self.dh.to_cpu(self.velocity_field.name)
+        return self.dh.gather_array(self.velocity_field.name)
+
+    def get_trimmed_velocity_array(self):
+        if self.gpu:
+            self.dh.to_cpu(self.velocity_field.name)
+        u = np.copy(self.dh.gather_array(self.velocity_field.name))
+        mask = self.bh.get_mask(None, self.wall)
+        for idx in np.ndindex(u.shape[:-1]):
+            if mask[idx] != 0:
+                u[idx] = np.full((self.dim, ), np.nan)
+        return u
+
+
+@pytest.mark.parametrize('stencil', [Stencil.D2Q9, Stencil.D3Q19, Stencil.D3Q27])
+@pytest.mark.parametrize('streaming_pattern', streaming_patterns)
+@pytest.mark.parametrize('target', targets)
+@pytest.mark.longrun
+def test_periodic_pipe(stencil, streaming_pattern, target):
+    stencil = LBStencil(stencil)
+    pipeflow = PeriodicPipeFlow(stencil, streaming_pattern, target=target)
+    pipeflow.init()
+    pipeflow.run(100)
+    u = pipeflow.get_trimmed_velocity_array()
+
+    #   Flow must be equal up to numerical error for all streaming patterns
+    global all_results
+    for key, prev_u in all_results.items():
+        if key[0] == stencil:
+            prev_pattern = key[1]
+            assert_allclose(
+                u, prev_u,
+                rtol=1, atol=1e-16,
+                err_msg=f'Velocity field for {streaming_pattern} differed from {prev_pattern}!')
+    all_results[(stencil, streaming_pattern)] = u
--- a/lbmpy_tests/benchmark/benchmark_evaluation.ipynb
+++ b/lbmpy_tests/benchmark/benchmark_evaluation.ipynb
--- a/tests/benchmark/db/config.json
+++ b/tests/benchmark/db/config.json
+{"indexes": {"simulationresult": {"pk": {"key": "pk", "id": "76f7f2660bd4475b90697716ef655a46"}}}, "store_class": "transactional", "index_class": "transactional", "index_store_class": "basic", "serializer_class": "json", "autocommit": false, "version": "0.2.12"}
\ No newline at end of file
--- a/lbmpy_tests/benchmark/evaluation.py
+++ b/lbmpy_tests/benchmark/evaluation.py
--- a/lbmpy_tests/benchmark/i10staff14_benchmark
+++ b/lbmpy_tests/benchmark/i10staff14_benchmark
--- a/lbmpy_tests/benchmark/test_benchmark.py
+++ b/lbmpy_tests/benchmark/test_benchmark.py
 from statistics import median

 import numpy as np
-import pytest
 import sympy as sp
+import pytest

 from lbmpy.scenarios import create_lid_driven_cavity
-from pystencils.cpu.cpujit import add_or_change_compiler_flags
+from lbmpy._compat import IS_PYSTENCILS_2
+
+if IS_PYSTENCILS_2:
+    pytest.skip(reason="Benchmark test case needs to be rebuilt for pystencils 2.0", allow_module_level=True)
+else:
+    from pystencils.cpu.cpujit import add_or_change_compiler_flags
+
 from pystencils.runhelper import ParameterStudy


@@ -228,11 +234,11 @@ def study_block_sizes_trt(study):
                study.add_run(params, weight=ds[0] // domain_sizes[0][0])


-@pytest.mark.longrun
-def test_run():
-    """Called by test suite - ensures that benchmark can be run"""
-    s = ParameterStudy(run)
-    study_optimization_options(s, domain_sizes=((17, 23), (19, 17, 18))).run()
+# @pytest.mark.longrun
+# def test_run():
+#     """Called by test suite - ensures that benchmark can be run"""
+#     s = ParameterStudy(run)
+#     study_optimization_options(s, domain_sizes=((17, 23), (19, 17, 18))).run()


 def study_compiler_flags(study):
No results found