Markus Holzer · Frederik Hennig · Markus Holzer · Markus Holzer · Frederik Hennig · Markus Holzer
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -12,6 +12,12 @@ stages:
  rules:
    - if: $CI_PIPELINE_SOURCE != "schedule"

+# Configuration for jobs meant to run on each commit to pycodegen/pystencils/master
+.every-commit-master:
+  rules:
+    - if: '$CI_PIPELINE_SOURCE != "schedule" && $CI_PROJECT_PATH == "pycodegen/lbmpy" && $CI_COMMIT_BRANCH == "master"'
+
+
 # Base configuration for jobs meant to run at a schedule
 .scheduled:
  rules:
@@ -264,6 +270,7 @@ build-documentation:

 pages:
  image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full
+  extends: .every-commit-master
  stage: deploy
  needs: ["tests-and-coverage", "build-documentation"]
  script:
@@ -275,5 +282,3 @@ pages:
      - public
  tags:
    - docker
-  only:
-    - master@pycodegen/lbmpy
--- a/conftest.py
+++ b/conftest.py
@@ -107,18 +107,25 @@ class IPyNbTest(pytest.Item):

        # disable matplotlib output
        exec("import matplotlib.pyplot as p; "
+             "p.close('all'); "
             "p.switch_backend('Template')", global_dict)

        # in notebooks there is an implicit plt.show() - if this is not called a warning is shown when the next
        # plot is created. This warning is suppressed here
+        # Also animations cannot be shown, which also leads to a warning.
        exec("import warnings;"
-             "warnings.filterwarnings('ignore', 'Adding an axes using the same arguments as a previous.*');",
+             "warnings.filterwarnings('ignore', 'Adding an axes using the same arguments as a previous.*');"
+             "warnings.filterwarnings('ignore', 'Animation was deleted without rendering anything.*');",
             global_dict)
        with tempfile.NamedTemporaryFile() as f:
            f.write(self.code.encode())
            f.flush()
            runpy.run_path(f.name, init_globals=global_dict, run_name=self.name)

+        #   Close any open figures
+        exec("import matplotlib.pyplot as p; "
+             "p.close('all')", global_dict)
+

 class IPyNbFile(pytest.File):
    def collect(self):
@@ -141,10 +148,19 @@ class IPyNbFile(pytest.File):
        pass


-def pytest_collect_file(path, parent):
-    glob_exprs = ["*demo*.ipynb", "*tutorial*.ipynb", "test_*.ipynb"]
-    if any(path.fnmatch(g) for g in glob_exprs):
-        if pytest_version >= 50403:
-            return IPyNbFile.from_parent(fspath=path, parent=parent)
-        else:
-            return IPyNbFile(path, parent)
+if pytest_version >= 70000:
+    #   Since pytest 7.0, usage of `py.path.local` is deprecated and `pathlib.Path` should be used instead
+    import pathlib
+
+    def pytest_collect_file(file_path: pathlib.Path, parent):
+        glob_exprs = ["*demo*.ipynb", "*tutorial*.ipynb", "test_*.ipynb"]
+        if any(file_path.match(g) for g in glob_exprs):
+            return IPyNbFile.from_parent(path=file_path, parent=parent)
+else:
+    def pytest_collect_file(path, parent):
+        glob_exprs = ["*demo*.ipynb", "*tutorial*.ipynb", "test_*.ipynb"]
+        if any(path.fnmatch(g) for g in glob_exprs):
+            if pytest_version >= 50403:
+                return IPyNbFile.from_parent(fspath=path, parent=parent)
+            else:
+                return IPyNbFile(path, parent)
--- a/src/lbmpy/__init__.py
+++ b/src/lbmpy/__init__.py
@@ -12,6 +12,7 @@ from .lbstep import LatticeBoltzmannStep
 from .macroscopic_value_kernels import (
    pdf_initialization_assignments,
    macroscopic_values_getter,
+    strain_rate_tensor_getter,
    compile_macroscopic_values_getter,
    compile_macroscopic_values_setter,
    create_advanced_velocity_setter_collision_rule,
@@ -42,6 +43,7 @@ __all__ = [
    "LatticeBoltzmannStep",
    "pdf_initialization_assignments",
    "macroscopic_values_getter",
+    "strain_rate_tensor_getter",
    "compile_macroscopic_values_getter",
    "compile_macroscopic_values_setter",
    "create_advanced_velocity_setter_collision_rule",

--- a/src/lbmpy/advanced_streaming/communication.py
+++ b/src/lbmpy/advanced_streaming/communication.py
 import itertools
 from pystencils import CreateKernelConfig, Field, Assignment, AssignmentCollection
-from pystencils.slicing import shift_slice, get_slice_before_ghost_layer, normalize_slice
-from lbmpy.advanced_streaming.utility import is_inplace, get_accessor, numeric_index, \
-    Timestep, get_timesteps, numeric_offsets
+from pystencils.slicing import (
+    shift_slice,
+    get_slice_before_ghost_layer,
+    normalize_slice,
+)
+from lbmpy.advanced_streaming.utility import (
+    is_inplace,
+    get_accessor,
+    numeric_index,
+    Timestep,
+    get_timesteps,
+    numeric_offsets,
+)
 from pystencils.datahandling import SerialDataHandling
 from pystencils.enums import Target
 from itertools import chain
@@ -10,22 +20,28 @@ from itertools import chain

 class LBMPeriodicityHandling:

-    def __init__(self, stencil, data_handling, pdf_field_name,
-                 streaming_pattern='pull', ghost_layers=1,
-                 cupy_direct_copy=True):
+    def __init__(
+        self,
+        stencil,
+        data_handling,
+        pdf_field_name,
+        streaming_pattern="pull",
+        ghost_layers=1,
+        cupy_direct_copy=True,
+    ):
        """
-            Periodicity Handling for Lattice Boltzmann Streaming.
-
-            **On the usage with cuda:**
-            - cupy allows the copying of sliced arrays within device memory using the numpy syntax,
-            e.g. `dst[:,0] = src[:,-1]`. In this implementation, this is the default for periodicity
-            handling. Alternatively, if you set `cupy_direct_copy=False`, GPU kernels are generated and
-            compiled. The compiled kernels are almost twice as fast in execution as cupy array copying,
-            but especially for large stencils like D3Q27, their compilation can take up to 20 seconds.
-            Choose your weapon depending on your use case.
+        Periodicity Handling for Lattice Boltzmann Streaming.
+
+        **On the usage with cuda:**
+        - cupy allows the copying of sliced arrays within device memory using the numpy syntax,
+        e.g. `dst[:,0] = src[:,-1]`. In this implementation, this is the default for periodicity
+        handling. Alternatively, if you set `cupy_direct_copy=False`, GPU kernels are generated and
+        compiled. The compiled kernels are almost twice as fast in execution as cupy array copying,
+        but especially for large stencils like D3Q27, their compilation can take up to 20 seconds.
+        Choose your weapon depending on your use case.
        """
        if not isinstance(data_handling, SerialDataHandling):
-            raise ValueError('Only serial data handling is supported!')
+            raise ValueError("Only serial data handling is supported!")

        self.stencil = stencil
        self.dim = stencil.D
@@ -56,12 +72,16 @@ class LBMPeriodicityHandling:
        self.comm_slices = []
        timesteps = get_timesteps(streaming_pattern)
        for timestep in timesteps:
-            slices_per_comm_dir = get_communication_slices(stencil=stencil,
-                                                           comm_stencil=copy_directions,
-                                                           streaming_pattern=streaming_pattern,
-                                                           prev_timestep=timestep,
-                                                           ghost_layers=ghost_layers)
-            self.comm_slices.append(list(chain.from_iterable(v for k, v in slices_per_comm_dir.items())))
+            slices_per_comm_dir = get_communication_slices(
+                stencil=stencil,
+                comm_stencil=copy_directions,
+                streaming_pattern=streaming_pattern,
+                prev_timestep=timestep,
+                ghost_layers=ghost_layers,
+            )
+            self.comm_slices.append(
+                list(chain.from_iterable(v for k, v in slices_per_comm_dir.items()))
+            )

        if self.target == Target.GPU and not cupy_direct_copy:
            self.device_copy_kernels = list()
@@ -81,11 +101,11 @@ class LBMPeriodicityHandling:
            arr[dst] = arr[src]

    def _compile_copy_kernels(self, timestep):
+        assert self.target == Target.GPU
        pdf_field = self.dh.fields[self.pdf_field_name]
        kernels = []
        for src, dst in self.comm_slices[timestep.idx]:
-            kernels.append(
-                periodic_pdf_copy_kernel(pdf_field, src, dst, target=self.target))
+            kernels.append(periodic_pdf_gpu_copy_kernel(pdf_field, src, dst))
        return kernels

    def _periodicity_handling_gpu(self, prev_timestep):
@@ -100,7 +120,12 @@ class LBMPeriodicityHandling:


 def get_communication_slices(
-        stencil, comm_stencil=None, streaming_pattern='pull', prev_timestep=Timestep.BOTH, ghost_layers=1):
+    stencil,
+    comm_stencil=None,
+    streaming_pattern="pull",
+    prev_timestep=Timestep.BOTH,
+    ghost_layers=1,
+):
    """
    Return the source and destination slices for periodicity handling or communication between blocks.

@@ -116,7 +141,9 @@ def get_communication_slices(
    if comm_stencil is None:
        comm_stencil = itertools.product(*([-1, 0, 1] for _ in range(stencil.D)))

-    pdfs = Field.create_generic('pdfs', spatial_dimensions=len(stencil[0]), index_shape=(stencil.Q,))
+    pdfs = Field.create_generic(
+        "pdfs", spatial_dimensions=len(stencil[0]), index_shape=(stencil.Q,)
+    )
    write_accesses = get_accessor(streaming_pattern, prev_timestep).write(pdfs, stencil)
    slices_per_comm_direction = dict()

@@ -130,7 +157,9 @@ def get_communication_slices(
            d = stencil.index(streaming_dir)
            write_index = numeric_index(write_accesses[d])[0]

-            origin_slice = get_slice_before_ghost_layer(comm_dir, ghost_layers=ghost_layers, thickness=1)
+            origin_slice = get_slice_before_ghost_layer(
+                comm_dir, ghost_layers=ghost_layers, thickness=1
+            )
            src_slice = _fix_length_one_slices(origin_slice)

            write_offsets = numeric_offsets(write_accesses[d])
@@ -138,13 +167,15 @@ def get_communication_slices(

            # TODO: this is just a hotfix. _trim_slice_in_direction breaks FreeSlip BC with adjacent periodic side
            if streaming_pattern != "pull":
-                src_slice = shift_slice(_trim_slice_in_direction(src_slice, tangential_dir), write_offsets)
+                src_slice = shift_slice(
+                    _trim_slice_in_direction(src_slice, tangential_dir), write_offsets
+                )

            neighbour_transform = _get_neighbour_transform(comm_dir, ghost_layers)
            dst_slice = shift_slice(src_slice, neighbour_transform)

-            src_slice = src_slice + (write_index, )
-            dst_slice = dst_slice + (write_index, )
+            src_slice = src_slice + (write_index,)
+            dst_slice = dst_slice + (write_index,)

            slices_for_dir.append((src_slice, dst_slice))

@@ -152,10 +183,10 @@ def get_communication_slices(
    return slices_per_comm_direction


-def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice,
-                             domain_size=None, target=Target.GPU):
-    """Copies a rectangular array slice onto another non-overlapping array slice"""
-    from pystencils.gpucuda.kernelcreation import create_cuda_kernel
+def periodic_pdf_gpu_copy_kernel(pdf_field, src_slice, dst_slice, domain_size=None):
+    """Generate a GPU kernel which copies all values from one slice of a field
+    to another non-overlapping slice."""
+    from pystencils import create_kernel

    pdf_idx = src_slice[-1]
    assert isinstance(pdf_idx, int), "PDF index needs to be an integer constant"
@@ -176,18 +207,28 @@ def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice,
    def _stop(s):
        return s.stop if isinstance(s, slice) else s

-    offset = [_start(s1) - _start(s2) for s1, s2 in zip(normalized_from_slice, normalized_to_slice)]
-    assert offset == [_stop(s1) - _stop(s2) for s1, s2 in zip(normalized_from_slice, normalized_to_slice)], \
-        "Slices have to have same size"
-
-    copy_eq = AssignmentCollection(main_assignments=[Assignment(pdf_field(pdf_idx), pdf_field[tuple(offset)](pdf_idx))])
-    config = CreateKernelConfig(iteration_slice=dst_slice, skip_independence_check=True)
-    ast = create_cuda_kernel(copy_eq, config=config)
-    if target == Target.GPU:
-        from pystencils.gpucuda import make_python_function
-        return make_python_function(ast)
-    else:
-        raise ValueError('Invalid target:', target)
+    offset = [
+        _start(s1) - _start(s2)
+        for s1, s2 in zip(normalized_from_slice, normalized_to_slice)
+    ]
+    assert offset == [
+        _stop(s1) - _stop(s2)
+        for s1, s2 in zip(normalized_from_slice, normalized_to_slice)
+    ], "Slices have to have same size"
+
+    copy_eq = AssignmentCollection(
+        main_assignments=[
+            Assignment(pdf_field(pdf_idx), pdf_field[tuple(offset)](pdf_idx))
+        ]
+    )
+    config = CreateKernelConfig(
+        iteration_slice=dst_slice,
+        skip_independence_check=True,
+        target=Target.GPU,
+    )
+
+    ast = create_kernel(copy_eq, config=config)
+    return ast.compile()


 def _extend_dir(direction):
@@ -196,10 +237,10 @@ def _extend_dir(direction):
    elif direction[0] == 0:
        for d in [-1, 0, 1]:
            for rest in _extend_dir(direction[1:]):
-                yield (d, ) + rest
+                yield (d,) + rest
    else:
        for rest in _extend_dir(direction[1:]):
-            yield (direction[0], ) + rest
+            yield (direction[0],) + rest


 def _get_neighbour_transform(direction, ghost_layers):

--- a/src/lbmpy/boundaries/boundaries_in_kernel.py
+++ b/src/lbmpy/boundaries/boundaries_in_kernel.py
@@ -67,7 +67,7 @@ def boundary_conditional(boundary, direction, streaming_pattern, prev_timestep,

    assignments = []
    for direction_idx in dir_indices:
-        rule = boundary(f_out, f_in, direction_idx, inv_dir, lb_method, index_field=None)
+        rule = boundary(f_out, f_in, direction_idx, inv_dir, lb_method, index_field=None, force_vector=None)

        #   rhs: replace f_out by post collision symbols.
        rhs_substitutions = {f_out(i): sym for i, sym in enumerate(lb_method.post_collision_pdf_symbols)}

--- a/src/lbmpy/boundaries/boundaryconditions.py
+++ b/src/lbmpy/boundaries/boundaryconditions.py
@@ -28,10 +28,11 @@ class LbBoundary(abc.ABC):
    inner_or_boundary = True
    single_link = False

-    def __init__(self, name=None):
+    def __init__(self, name=None, calculate_force_on_boundary=False):
        self._name = name
+        self.calculate_force_on_boundary = calculate_force_on_boundary

-    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field):
+    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field, force_vector):
        """
        This function defines the boundary behavior and must therefore be implemented by all boundaries.
        The boundary is defined through a list of sympy equations from which a boundary kernel is generated.
@@ -48,6 +49,8 @@ class LbBoundary(abc.ABC):
        lb_method:      an instance of the LB method used. Use this to adapt the boundary to the method
                        (e.g. compressibility)
        index_field:    the boundary index field that can be used to retrieve and update boundary data
+        force_vector:   vector to store the force on the boundary. Has the same size as the index field and
+                        D-entries per cell

        Returns:
            list of pystencils assignments, or pystencils.AssignmentCollection
@@ -109,14 +112,31 @@ class NoSlip(LbBoundary):

    Args:
        name: optional name of the boundary.
+        calculate_force_on_boundary: stores the force for each PDF at the boundary in a force vector
    """

-    def __init__(self, name=None):
+    def __init__(self, name=None, calculate_force_on_boundary=False):
        """Set an optional name here, to mark boundaries, for example for force evaluations"""
-        super(NoSlip, self).__init__(name)
+        super(NoSlip, self).__init__(name, calculate_force_on_boundary)

-    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field):
-        return Assignment(f_in(inv_dir[dir_symbol]), f_out(dir_symbol))
+    def get_additional_code_nodes(self, lb_method):
+        if self.calculate_force_on_boundary:
+            return [NeighbourOffsetArrays(lb_method.stencil)]
+        else:
+            return []
+
+    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field, force_vector):
+        if self.calculate_force_on_boundary:
+            force = sp.Symbol("f")
+            subexpressions = [Assignment(force, sp.Float(2.0) * f_out(dir_symbol))]
+            offset = NeighbourOffsetArrays.neighbour_offset(dir_symbol, lb_method.stencil)
+            for i in range(lb_method.stencil.D):
+                subexpressions.append(Assignment(force_vector[0](f'F_{i}'), force * offset[i]))
+        else:
+            subexpressions = []
+
+        boundary_assignments = [Assignment(f_in(inv_dir[dir_symbol]), f_out(dir_symbol))]
+        return AssignmentCollection(boundary_assignments, subexpressions=subexpressions)


 class NoSlipLinearBouzidi(LbBoundary):
@@ -135,11 +155,10 @@ class NoSlipLinearBouzidi(LbBoundary):
        data_type: data type of the wall distance q
    """

-    def __init__(self, name=None, init_wall_distance=None, data_type='double'):
+    def __init__(self, name=None, init_wall_distance=None, data_type='double', calculate_force_on_boundary=False):
        self.data_type = data_type
        self.init_wall_distance = init_wall_distance
-
-        super(NoSlipLinearBouzidi, self).__init__(name)
+        super(NoSlipLinearBouzidi, self).__init__(name, calculate_force_on_boundary)

    @property
    def additional_data(self):
@@ -147,6 +166,12 @@ class NoSlipLinearBouzidi(LbBoundary):
        direction is needed. This information is stored in the index vector."""
        return [('q', create_type(self.data_type))]

+    def get_additional_code_nodes(self, lb_method):
+        if self.calculate_force_on_boundary:
+            return [NeighbourOffsetArrays(lb_method.stencil)]
+        else:
+            return []
+
    @property
    def additional_data_init_callback(self):
        def default_callback(boundary_data, **_):
@@ -160,7 +185,7 @@ class NoSlipLinearBouzidi(LbBoundary):
                 "(init_wall_distance=None). The boundary condition will fall back to a simple NoSlip BC")
            return default_callback

-    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field):
+    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field, force_vector):
        f_xf = sp.Symbol("f_xf")
        f_xf_inv = sp.Symbol("f_xf_inv")
        d_x2f = sp.Symbol("d_x2f")
@@ -182,6 +207,13 @@ class NoSlipLinearBouzidi(LbBoundary):
                           (case_two, sp.And(sp.Gt(q, 0), sp.Lt(q, 0.5))),
                           (case_three, True))

+        if self.calculate_force_on_boundary:
+            force = sp.Symbol("f")
+            subexpressions.append(Assignment(force, f_xf + rhs))
+            offset = NeighbourOffsetArrays.neighbour_offset(dir_symbol, lb_method.stencil)
+            for i in range(lb_method.stencil.D):
+                subexpressions.append(Assignment(force_vector[0](f'F_{i}'), force * offset[i]))
+
        boundary_assignments = [Assignment(f_in(inv_dir[dir_symbol]), rhs)]

        return AssignmentCollection(boundary_assignments, subexpressions=subexpressions)
@@ -201,14 +233,15 @@ class QuadraticBounceBack(LbBoundary):
        data_type: data type of the wall distance q
    """

-    def __init__(self, relaxation_rate, name=None, init_wall_distance=None, data_type='double'):
+    def __init__(self, relaxation_rate, name=None, init_wall_distance=None, data_type='double',
+                 calculate_force_on_boundary=False):
        self.relaxation_rate = relaxation_rate
        self.data_type = data_type
        self.init_wall_distance = init_wall_distance
        self.equilibrium_values_name = "f_eq"
        self.inv_dir_symbol = TypedSymbol("inv_dir", create_type("int32"))

-        super(QuadraticBounceBack, self).__init__(name)
+        super(QuadraticBounceBack, self).__init__(name, calculate_force_on_boundary)

    @property
    def additional_data(self):
@@ -258,7 +291,7 @@ class QuadraticBounceBack(LbBoundary):

        return result

-    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field):
+    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field, force_vector):
        omega = self.relaxation_rate
        inv = sp.IndexedBase(self.inv_dir_symbol, shape=(1,))[dir_symbol]
        weight_info = LbmWeightInfo(lb_method, data_type=self.data_type)
@@ -311,6 +344,13 @@ class QuadraticBounceBack(LbBoundary):
        t2 = (q * (f_xf + f_xf_inv)) / (one + q)
        result = (one - q) / (one + q) * t1 * half + t2

+        if self.calculate_force_on_boundary:
+            force = sp.Symbol("f")
+            subexpressions.append(Assignment(force, f_xf + result))
+            offset = NeighbourOffsetArrays.neighbour_offset(dir_symbol, lb_method.stencil)
+            for i in range(lb_method.stencil.D):
+                subexpressions.append(Assignment(force_vector[0](f'F_{i}'), force * offset[i]))
+
        boundary_assignments = [Assignment(f_in(inv_dir[dir_symbol]), result)]
        return AssignmentCollection(boundary_assignments, subexpressions=subexpressions)

@@ -355,7 +395,7 @@ class FreeSlip(LbBoundary):
        if name is None and normal_direction:
            name = f"Free Slip : {offset_to_direction_string([-x for x in normal_direction])}"

-        super(FreeSlip, self).__init__(name)
+        super(FreeSlip, self).__init__(name, calculate_force_on_boundary=False)

    def init_callback(self, boundary_data, **_):
        if len(boundary_data.index_array) > 1e6:
@@ -425,7 +465,7 @@ class FreeSlip(LbBoundary):
        else:
            return [NeighbourOffsetArrays(lb_method.stencil)]

-    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field):
+    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field, force_vector):
        neighbor_offset = NeighbourOffsetArrays.neighbour_offset(dir_symbol, lb_method.stencil)
        if self.normal_direction:
            tangential_offset = tuple(offset + normal for offset, normal in zip(neighbor_offset, self.normal_direction))
@@ -559,13 +599,13 @@ class WallFunctionBounce(LbBoundary):
        if name is None:
            name = f"WFB : {offset_to_direction_string([-x for x in normal_direction])}"

-        super(WallFunctionBounce, self).__init__(name)
+        super(WallFunctionBounce, self).__init__(name, calculate_force_on_boundary=False)

    def get_additional_code_nodes(self, lb_method):
        return [MirroredStencilDirections(self.stencil, self.mirror_axis),
                NeighbourOffsetArrays(lb_method.stencil)]

-    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field):
+    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field, force_vector):
        # needed symbols for offsets and indices
        # neighbour offset symbols are basically the stencil directions defined in stencils.py:L130ff.
        neighbor_offset = NeighbourOffsetArrays.neighbour_offset(dir_symbol, lb_method.stencil)
@@ -715,7 +755,7 @@ class UBB(LbBoundary):
        self.dim = dim
        self.data_type = data_type

-        super(UBB, self).__init__(name)
+        super(UBB, self).__init__(name, calculate_force_on_boundary=False)

    @property
    def additional_data(self):
@@ -751,7 +791,7 @@ class UBB(LbBoundary):
        This is useful if the inflow velocity should have a certain profile for instance"""
        return callable(self._velocity)

-    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field):
+    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field, force_vector):
        vel_from_idx_field = callable(self._velocity)
        vel = [index_field(f'vel_{i}') for i in range(self.dim)] if vel_from_idx_field else self._velocity

@@ -827,7 +867,7 @@ class SimpleExtrapolationOutflow(LbBoundary):
        self.normal_direction = tuple([int(n) for n in normal_direction])
        assert all([n in [-1, 0, 1] for n in self.normal_direction]), \
            "Only -1, 0 and 1 allowed for defining the normal direction"
-        super(SimpleExtrapolationOutflow, self).__init__(name)
+        super(SimpleExtrapolationOutflow, self).__init__(name, calculate_force_on_boundary=False)

    def get_additional_code_nodes(self, lb_method):
        """Return a list of code nodes that will be added in the generated code before the index field loop.
@@ -841,7 +881,7 @@ class SimpleExtrapolationOutflow(LbBoundary):
        """
        return [NeighbourOffsetArrays(lb_method.stencil)]

-    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field):
+    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field, force_vector):
        neighbor_offset = NeighbourOffsetArrays.neighbour_offset(dir_symbol, lb_method.stencil)
        tangential_offset = tuple(offset - normal for offset, normal in zip(neighbor_offset, self.normal_direction))

@@ -867,7 +907,7 @@ class ExtrapolationOutflow(LbBoundary):

    Args:
        normal_direction: direction vector normal to the outflow
-        lb_method: the lattice boltzman method to be used in the simulation
+        lb_method: the lattice Boltzmann method to be used in the simulation
        dt: lattice time step size
        dx: lattice spacing distance
        name: optional name of the boundary.
@@ -920,7 +960,7 @@ class ExtrapolationOutflow(LbBoundary):

            self.equilibrium_calculation = calc_eq_pdfs

-        super(ExtrapolationOutflow, self).__init__(name)
+        super(ExtrapolationOutflow, self).__init__(name, calculate_force_on_boundary=False)

    def init_callback(self, boundary_data, **_):
        dim = boundary_data.dim
@@ -973,7 +1013,7 @@ class ExtrapolationOutflow(LbBoundary):
        """
        return [NeighbourOffsetArrays(lb_method.stencil)]

-    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field):
+    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field, force_vector):
        subexpressions = []
        boundary_assignments = []
        dtdx = sp.Rational(self.dt, self.dx)
@@ -1019,9 +1059,9 @@ class FixedDensity(LbBoundary):
            name = "Fixed Density " + str(density)
        self.density = density

-        super(FixedDensity, self).__init__(name)
+        super(FixedDensity, self).__init__(name, calculate_force_on_boundary=False)

-    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field):
+    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field, force_vector):
        def remove_asymmetric_part_of_main_assignments(assignment_collection, degrees_of_freedom):
            new_main_assignments = [Assignment(a.lhs, get_symmetric_part(a.rhs, degrees_of_freedom))
                                    for a in assignment_collection.main_assignments]
@@ -1083,7 +1123,7 @@ class DiffusionDirichlet(LbBoundary):
        self.concentration_is_callable = callable(self.concentration)
        self.velocity_field = velocity_field

-        super(DiffusionDirichlet, self).__init__(name)
+        super(DiffusionDirichlet, self).__init__(name, calculate_force_on_boundary=False)

    @property
    def additional_data(self):
@@ -1116,7 +1156,7 @@ class DiffusionDirichlet(LbBoundary):
        else:
            return [LbmWeightInfo(lb_method, self._data_type)]

-    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field):
+    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field, force_vector):
        assert lb_method.conserved_quantity_computation.zero_centered_pdfs is False, \
            "DiffusionDirichlet only works for methods with normal pdfs storage -> set zero_centered=False"
        weight_info = LbmWeightInfo(lb_method, self._data_type)
@@ -1159,7 +1199,7 @@ class NeumannByCopy(LbBoundary):
        """
        return [NeighbourOffsetArrays(lb_method.stencil)]

-    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field):
+    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field, force_vector):
        neighbour_offset = NeighbourOffsetArrays.neighbour_offset(dir_symbol, lb_method.stencil)
        return [Assignment(f_in(inv_dir[dir_symbol]), f_out(inv_dir[dir_symbol])),
                Assignment(f_out[neighbour_offset](dir_symbol), f_out(dir_symbol))]
@@ -1178,7 +1218,7 @@ class StreamInConstant(LbBoundary):
    """

    def __init__(self, constant, name=None):
-        super(StreamInConstant, self).__init__(name)
+        super(StreamInConstant, self).__init__(name, calculate_force_on_boundary=False)
        self.constant = constant

    def get_additional_code_nodes(self, lb_method):
@@ -1192,7 +1232,7 @@ class StreamInConstant(LbBoundary):
        """
        return [NeighbourOffsetArrays(lb_method.stencil)]

-    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field):
+    def __call__(self, f_out, f_in, dir_symbol, inv_dir, lb_method, index_field, force_vector):
        neighbour_offset = NeighbourOffsetArrays.neighbour_offset(dir_symbol, lb_method.stencil)
        return [Assignment(f_in(inv_dir[dir_symbol]), self.constant),
                Assignment(f_out[neighbour_offset](dir_symbol), self.constant)]

--- a/src/lbmpy/boundaries/boundaryhandling.py
+++ b/src/lbmpy/boundaries/boundaryhandling.py
+from dataclasses import replace
 import numpy as np

 from pystencils import Assignment, CreateKernelConfig, create_kernel, Field, Target
 from pystencils.boundaries import BoundaryHandling
 from pystencils.boundaries.createindexlist import numpy_data_type_for_boundary_object
+from pystencils.field import FieldType
 from pystencils.simp import add_subexpressions_for_field_reads
 from pystencils.stencil import inverse_direction

@@ -156,22 +158,31 @@ class LatticeBoltzmannBoundaryHandling(BoundaryHandling):

 def create_lattice_boltzmann_boundary_kernel(pdf_field, index_field, lb_method, boundary_functor,
                                             prev_timestep=Timestep.BOTH, streaming_pattern='pull',
-                                             target=Target.CPU, **kernel_creation_args):
+                                             target=Target.CPU, force_vector=None, **kernel_creation_args):

    indexing = BetweenTimestepsIndexing(
        pdf_field, lb_method.stencil, prev_timestep, streaming_pattern, np.int32, np.int32)

+    dim = lb_method.stencil.D
    f_out, f_in = indexing.proxy_fields
    dir_symbol = indexing.dir_symbol
    inv_dir = indexing.inverse_dir_symbol

-    boundary_assignments = boundary_functor(f_out, f_in, dir_symbol, inv_dir, lb_method, index_field)
-    boundary_assignments = indexing.substitute_proxies(boundary_assignments)
-
-    config = CreateKernelConfig(index_fields=[index_field], target=target, default_number_int="int32",
+    config = CreateKernelConfig(target=target, default_number_int="int32",
                                skip_independence_check=True, **kernel_creation_args)

    default_data_type = config.data_type.default_factory()
+
+    if force_vector is None:
+        force_vector_type = np.dtype([(f"F_{i}", default_data_type.c_name) for i in range(dim)], align=True)
+        force_vector = Field.create_generic('force_vector', spatial_dimensions=1,
+                                            dtype=force_vector_type, field_type=FieldType.INDEXED)
+
+    config = replace(config, index_fields=[index_field, force_vector])
+
+    boundary_assignments = boundary_functor(f_out, f_in, dir_symbol, inv_dir, lb_method, index_field, force_vector)
+    boundary_assignments = indexing.substitute_proxies(boundary_assignments)
+
    if pdf_field.dtype != default_data_type:
        boundary_assignments = add_subexpressions_for_field_reads(boundary_assignments, data_type=default_data_type)


--- a/src/lbmpy/flow_statistics.py
+++ b/src/lbmpy/flow_statistics.py
@@ -4,17 +4,18 @@ import pystencils as ps
 from pystencils.field import Field


-def welford_assignments(field, mean_field, sum_of_products_field=None):
+def welford_assignments(field, mean_field, sum_of_squares_field=None, sum_of_cubes_field=None):
    r"""
    Creates the assignments for the kernel creation of Welford's algorithm
    (https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm).
    This algorithm is an online algorithm for the mean and variance calculation of sample data, here implemented for
-    the execution on vector fields, e.g., velocity.
-    The calculation of the variance is optional and only performed if a field for the sum of products is given.
+    the execution on scalar or vector fields, e.g., velocity.
+    The calculation of the variance / the third-order central moments is optional and only performed if a field for
+    the sum of squares / sum of cubes is given.

    The mean value is directly updated in the mean vector field.
-    The variance must be retrieved in a post-processing step. Let :math `M_{2,n}` denote the value of the sum of
-    products after the first :math `n` samples. According to Welford the biased sample variance
+    The variance / covariance must be retrieved in a post-processing step. Let :math `M_{2,n}` denote the value of the
+    sum of squares after the first :math `n` samples. According to Welford the biased sample variance
    :math `\sigma_n^2` and the unbiased sample variance :math `s_n^2` are given by

    .. math ::
@@ -22,6 +23,9 @@ def welford_assignments(field, mean_field, sum_of_products_field=None):
        s_n^2 = \frac{M_{2,n}}{n-1},

    respectively.
+
+    Liekwise, to the 3rd-order central moment(s) of the (vector) field, the sum of cubes field must be divided
+    by :math `n` in a post-processing step.
    """

    if isinstance(field, Field):
@@ -40,22 +44,41 @@ def welford_assignments(field, mean_field, sum_of_products_field=None):
    else:
        raise ValueError("Mean vector field has to be a pystencils Field or Field.Access")

-    if sum_of_products_field is None:
-        # sum of products will not be calculated, i.e., the variance is not retrievable
-        welford_sum_of_products_field = None
+    if sum_of_squares_field is None:
+        # sum of products will not be calculated, i.e., the covariance matrix is not retrievable
+        welford_sum_of_squares_field = None
+    else:
+        if isinstance(sum_of_squares_field, Field):
+            welford_sum_of_squares_field = sum_of_squares_field.center
+        elif isinstance(sum_of_squares_field, Field.Access):
+            welford_sum_of_squares_field = sum_of_squares_field
+        else:
+            raise ValueError("Sum of squares field has to be a pystencils Field or Field.Access")
+
+        assert welford_sum_of_squares_field.field.values_per_cell() == dim ** 2, \
+            (f"Sum of squares field does not have the right layout. "
+             f"Index dimension: {welford_sum_of_squares_field.field.values_per_cell()}, expected: {dim ** 2}")
+
+    if sum_of_cubes_field is None:
+        # sum of cubes will not be calculated, i.e., the 3rd-order central moments are not retrievable
+        welford_sum_of_cubes_field = None
    else:
-        if isinstance(sum_of_products_field, Field):
-            welford_sum_of_products_field = sum_of_products_field.center
-            assert sum_of_products_field.values_per_cell() == dim**2, \
-                (f"Sum of products field does not have the right layout. "
-                 f"Index dimension: {sum_of_products_field.values_per_cell()}, expected: {dim**2}")
-        elif isinstance(sum_of_products_field, Field.Access):
-            welford_sum_of_products_field = sum_of_products_field
-            assert sum_of_products_field.field.values_per_cell() == dim**2, \
-                (f"Sum of products field does not have the right layout. "
-                 f"Index dimension: {sum_of_products_field.field.values_per_cell()}, expected: {dim**2}")
+        if isinstance(sum_of_cubes_field, Field):
+            welford_sum_of_cubes_field = sum_of_cubes_field.center
+        elif isinstance(sum_of_cubes_field, Field.Access):
+            welford_sum_of_cubes_field = sum_of_cubes_field
        else:
-            raise ValueError("Variance vector field has to be a pystencils Field or Field.Access")
+            raise ValueError("Sum of cubes field has to be a pystencils Field or Field.Access")
+
+        assert welford_sum_of_cubes_field.field.values_per_cell() == dim ** 3, \
+            (f"Sum of cubes field does not have the right layout. "
+             f"Index dimension: {welford_sum_of_cubes_field.field.values_per_cell()}, expected: {dim ** 3}")
+
+    # for the calculation of the thrid-order moments, the variance must also be calculated
+    if welford_sum_of_cubes_field is not None:
+        assert welford_sum_of_squares_field is not None
+
+    # actual assignments

    counter = sp.Symbol('counter')
    delta = sp.symbols(f"delta_:{dim}")
@@ -67,7 +90,7 @@ def welford_assignments(field, mean_field, sum_of_products_field=None):
        main_assignments.append(ps.Assignment(welford_mean_field.at_index(i),
                                              welford_mean_field.at_index(i) + delta[i] / counter))

-    if sum_of_products_field is not None:
+    if sum_of_squares_field is not None:
        delta2 = sp.symbols(f"delta2_:{dim}")
        for i in range(dim):
            main_assignments.append(
@@ -76,7 +99,21 @@ def welford_assignments(field, mean_field, sum_of_products_field=None):
            for j in range(dim):
                idx = i * dim + j
                main_assignments.append(ps.Assignment(
-                    welford_sum_of_products_field.at_index(idx),
-                    welford_sum_of_products_field.at_index(idx) + delta[i] * delta2[j]))
+                    welford_sum_of_squares_field.at_index(idx),
+                    welford_sum_of_squares_field.at_index(idx) + delta[i] * delta2[j]))
+
+        if sum_of_cubes_field is not None:
+            for i in range(dim):
+                for j in range(dim):
+                    for k in range(dim):
+                        idx = (i * dim + j) * dim + k
+                        main_assignments.append(ps.Assignment(
+                            welford_sum_of_cubes_field.at_index(idx),
+                            welford_sum_of_cubes_field.at_index(idx)
+                            - delta[k] / counter * welford_sum_of_squares_field(i * dim + j)
+                            - delta[j] / counter * welford_sum_of_squares_field(k * dim + i)
+                            - delta[i] / counter * welford_sum_of_squares_field(j * dim + k)
+                            + delta2[i] * (2 * delta[j] - delta2[j]) * delta[k]
+                        ))

    return main_assignments
--- a/src/lbmpy/macroscopic_value_kernels.py
+++ b/src/lbmpy/macroscopic_value_kernels.py
 import functools
+import sympy as sp
 from copy import deepcopy
 from lbmpy.simplificationfactory import create_simplification_strategy
-from pystencils import create_kernel, CreateKernelConfig
+from pystencils import create_kernel, CreateKernelConfig, Assignment
 from pystencils.field import Field, get_layout_of_array
 from pystencils.enums import Target

 from lbmpy.advanced_streaming.utility import get_accessor, Timestep
+from lbmpy.relaxationrates import get_shear_relaxation_rate
+from lbmpy.utils import second_order_moment_tensor


-def pdf_initialization_assignments(lb_method, density, velocity, pdfs,
-                                   streaming_pattern='pull', previous_timestep=Timestep.BOTH,
-                                   set_pre_collision_pdfs=False):
-    """Assignments to initialize the pdf field with equilibrium"""
+def get_field_accesses(lb_method, pdfs, streaming_pattern, previous_timestep, pre_collision_pdfs):
    if isinstance(pdfs, Field):
        accessor = get_accessor(streaming_pattern, previous_timestep)
-        if set_pre_collision_pdfs:
+        if pre_collision_pdfs:
            field_accesses = accessor.read(pdfs, lb_method.stencil)
        else:
            field_accesses = accessor.write(pdfs, lb_method.stencil)
-    elif streaming_pattern == 'pull' and not set_pre_collision_pdfs:
+    elif streaming_pattern == 'pull' and not pre_collision_pdfs:
        field_accesses = pdfs
    else:
        raise ValueError("Invalid value of pdfs: A PDF field reference is required to derive "
                         + f"initialization assignments for streaming pattern {streaming_pattern}.")
+    return field_accesses
+
+
+def pdf_initialization_assignments(lb_method, density, velocity, pdfs,
+                                   streaming_pattern='pull', previous_timestep=Timestep.BOTH,
+                                   set_pre_collision_pdfs=False):
+    """Assignments to initialize the pdf field with equilibrium"""
+    field_accesses = get_field_accesses(lb_method, pdfs, streaming_pattern, previous_timestep, set_pre_collision_pdfs)

    if isinstance(density, Field):
        density = density.center
-
    if isinstance(velocity, Field):
        velocity = velocity.center_vector

@@ -41,17 +48,9 @@ def pdf_initialization_assignments(lb_method, density, velocity, pdfs,
 def macroscopic_values_getter(lb_method, density, velocity, pdfs,
                              streaming_pattern='pull', previous_timestep=Timestep.BOTH,
                              use_pre_collision_pdfs=False):
-    if isinstance(pdfs, Field):
-        accessor = get_accessor(streaming_pattern, previous_timestep)
-        if use_pre_collision_pdfs:
-            field_accesses = accessor.read(pdfs, lb_method.stencil)
-        else:
-            field_accesses = accessor.write(pdfs, lb_method.stencil)
-    elif streaming_pattern == 'pull' and not use_pre_collision_pdfs:
-        field_accesses = pdfs
-    else:
-        raise ValueError("Invalid value of pdfs: A PDF field reference is required to derive "
-                         + f"getter assignments for streaming pattern {streaming_pattern}.")
+
+    field_accesses = get_field_accesses(lb_method, pdfs, streaming_pattern, previous_timestep, use_pre_collision_pdfs)
+
    cqc = lb_method.conserved_quantity_computation
    assert not (velocity is None and density is None)
    output_spec = {}
@@ -65,6 +64,27 @@ def macroscopic_values_getter(lb_method, density, velocity, pdfs,
 macroscopic_values_setter = pdf_initialization_assignments


+def strain_rate_tensor_getter(lb_method, strain_rate_tensor, pdfs, streaming_pattern='pull',
+                              previous_timestep=Timestep.BOTH, use_pre_collision_pdfs=False):
+
+    field_accesses = get_field_accesses(lb_method, pdfs, streaming_pattern, previous_timestep, use_pre_collision_pdfs)
+
+    if isinstance(strain_rate_tensor, Field):
+        strain_rate_tensor = strain_rate_tensor.center_vector
+
+    omega_s = get_shear_relaxation_rate(lb_method)
+    equilibrium = lb_method.equilibrium_distribution
+    rho = equilibrium.density if equilibrium.compressible else equilibrium.background_density
+
+    f_neq = sp.Matrix([field_accesses[i] for i in range(lb_method.stencil.Q)]) - lb_method.get_equilibrium_terms()
+    pi = second_order_moment_tensor(f_neq, lb_method.stencil)
+    strain_rate_tensor_equ = - 1.5 * (omega_s / rho) * pi
+
+    assignments = [Assignment(strain_rate_tensor[i * lb_method.stencil.D + j], strain_rate_tensor_equ[i, j])
+                   for i in range(lb_method.stencil.D) for j in range(lb_method.stencil.D)]
+    return assignments
+
+
 def compile_macroscopic_values_getter(lb_method, output_quantities, pdf_arr=None,
                                      ghost_layers=1, iteration_slice=None,
                                      field_layout='numpy', target=Target.CPU,

--- a/src/lbmpy/methods/abstractlbmethod.py
+++ b/src/lbmpy/methods/abstractlbmethod.py
@@ -105,22 +105,27 @@ class AbstractLbMethod(abc.ABC):
        the subexpressions, that assign the number to the newly introduced symbol
        """
        rr = relaxation_rates if relaxation_rates is not None else self.relaxation_rates
-        unique_relaxation_rates = set()
        subexpressions = {}
+        symbolic_relaxation_rates = list()
        for relaxation_rate in rr:
            relaxation_rate = sp.sympify(relaxation_rate)
-            if relaxation_rate not in unique_relaxation_rates:
-                # special treatment for zero, sp.Zero would be an integer ..
+            if isinstance(relaxation_rate, sp.Symbol):
+                symbolic_relaxation_rate = relaxation_rate
+            else:
                if isinstance(relaxation_rate, Zero):
                    relaxation_rate = 0.0
-                if not isinstance(relaxation_rate, sp.Symbol):
-                    rt_symbol = sp.Symbol(f"rr_{len(subexpressions)}")
-                    subexpressions[relaxation_rate] = rt_symbol
-            unique_relaxation_rates.add(relaxation_rate)
+                if relaxation_rate in subexpressions:
+                    symbolic_relaxation_rate = subexpressions[relaxation_rate]
+                else:
+                    symbolic_relaxation_rate = sp.Symbol(f"rr_{len(subexpressions)}")
+                    subexpressions[relaxation_rate] = symbolic_relaxation_rate
+            symbolic_relaxation_rates.append(symbolic_relaxation_rate)

-        new_rr = [subexpressions[sp.sympify(e)] if sp.sympify(e) in subexpressions else e for e in rr]
        substitutions = [Assignment(e[1], e[0]) for e in subexpressions.items()]
        if relaxation_rates_modifier is not None:
-            new_rr = [r * relaxation_rates_modifier for r in new_rr]
+            symbolic_relaxation_rates = [r * relaxation_rates_modifier for r in symbolic_relaxation_rates]
+        else:
+            for srr in symbolic_relaxation_rates:
+                assert isinstance(srr, sp.Symbol)

-        return substitutions, sp.diag(*new_rr)
+        return substitutions, sp.diag(*symbolic_relaxation_rates)
--- a/src/lbmpy/moment_transforms/rawmomenttransforms.py
+++ b/src/lbmpy/moment_transforms/rawmomenttransforms.py
@@ -172,7 +172,7 @@ class PdfsToMomentsByMatrixTransform(AbstractRawMomentTransform):

    #   ----------------------------- Private Members -----------------------------

-    @ property
+    @property
    def _default_simplification(self):
        forward_simp = SimplificationStrategy()
        # forward_simp.add(substitute_moments_in_conserved_quantity_equations)
@@ -218,7 +218,7 @@ class PdfsToMomentsByChimeraTransform(AbstractRawMomentTransform):
                                                                                self.moment_polynomials)
        self.poly_to_mono_matrix = self.mono_to_poly_matrix.inv()

-    @ property
+    @property
    def absorbs_conserved_quantity_equations(self):
        return True

@@ -414,7 +414,7 @@ class PdfsToMomentsByChimeraTransform(AbstractRawMomentTransform):

    #   ----------------------------- Private Members -----------------------------

-    @ property
+    @property
    def _default_simplification(self):
        from lbmpy.methods.momentbased.momentbasedsimplifications import (
            substitute_moments_in_conserved_quantity_equations,

--- a/src/lbmpy/phasefield/contact_angle_circle_fitting.py
+++ b/src/lbmpy/phasefield/contact_angle_circle_fitting.py
@@ -39,8 +39,8 @@ def circle_intersections(midpoint0, midpoint1, radius0, radius1):
 def interface_region(concentration_arr, phase0, phase1, area=3):
    import scipy.ndimage as sc_image

-    area_phase0 = sc_image.morphology.binary_dilation(concentration_arr[..., phase0] > 0.5, iterations=area)
-    area_phase1 = sc_image.morphology.binary_dilation(concentration_arr[..., phase1] > 0.5, iterations=area)
+    area_phase0 = sc_image.binary_dilation(concentration_arr[..., phase0] > 0.5, iterations=area)
+    area_phase1 = sc_image.binary_dilation(concentration_arr[..., phase1] > 0.5, iterations=area)
    return np.logical_and(area_phase0, area_phase1)



--- a/src/lbmpy/phasefield_allen_cahn/parameter_calculation.py
+++ b/src/lbmpy/phasefield_allen_cahn/parameter_calculation.py
@@ -366,8 +366,8 @@ def calculate_parameters_taylor_bubble(reference_length=128,
    water_mu_lattice_units = math.sqrt(density_diff * density_heavy * grav_df_cube) / inverse_viscosity_number
    air_mu_lattice_units = water_mu_lattice_units / (water_mu / air_mu)

-    dynamic_viscosity_heavy = water_mu_lattice_units / density_heavy
-    dynamic_viscosity_light = air_mu_lattice_units / density_light
+    dynamic_viscosity_heavy = water_mu_lattice_units
+    dynamic_viscosity_light = air_mu_lattice_units

    surface_tension_lattice_units = density_diff * gravity_lattice_units * diameter_fluid ** 2 / bond_number


--- a/src/lbmpy/updatekernels.py
+++ b/src/lbmpy/updatekernels.py
@@ -120,6 +120,26 @@ def create_stream_pull_with_output_kernel(lb_method, src_field, dst_field=None,
                            simplification_hints=output_eq_collection.simplification_hints)


+def create_copy_kernel(stencil, src_field, dst_field, accessor=StreamPullTwoFieldsAccessor()):
+    """Creates a copy kernel, which can be used to transfer information from src to dst field.
+
+    Args:
+        stencil: lattice Boltzmann stencil which is used in the form of a tuple of tuples
+        src_field: field used for reading pdf values
+        dst_field: field used for writing pdf values
+        accessor: instance of PdfFieldAccessor, defining where to read and write values
+                  to create e.g. a fused stream-collide kernel See 'fieldaccess.PdfFieldAccessor'
+
+    Returns:
+        AssignmentCollection of a copy update rule
+    """
+
+    temporary_symbols = sp.symbols(f'copied:{stencil.Q}')
+    subexpressions = [Assignment(tmp, acc) for tmp, acc in zip(temporary_symbols, accessor.write(src_field, stencil))]
+    main_assignments = [Assignment(acc, tmp) for acc, tmp in zip(accessor.write(dst_field, stencil), temporary_symbols)]
+    return AssignmentCollection(main_assignments, subexpressions=subexpressions)
+
+
 # ---------------------------------- Pdf array creation for various layouts --------------------------------------------



--- a/tests/advanced_streaming/test_communication.py
+++ b/tests/advanced_streaming/test_communication.py
@@ -5,36 +5,52 @@ import numpy as np
 from lbmpy.stencils import LBStencil
 from pystencils.slicing import get_slice_before_ghost_layer, get_ghost_region_slice
 from lbmpy.creationfunctions import create_lb_update_rule, LBMConfig, LBMOptimisation
-from lbmpy.advanced_streaming.communication import get_communication_slices, _fix_length_one_slices, \
-    LBMPeriodicityHandling
+from lbmpy.advanced_streaming.communication import (
+    get_communication_slices,
+    _fix_length_one_slices,
+    LBMPeriodicityHandling,
+    periodic_pdf_gpu_copy_kernel,
+)
 from lbmpy.advanced_streaming.utility import streaming_patterns, Timestep
 from lbmpy.enums import Stencil

 import pytest


-@pytest.mark.parametrize('stencil', [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27])
-@pytest.mark.parametrize('streaming_pattern', streaming_patterns)
-@pytest.mark.parametrize('timestep', [Timestep.EVEN, Timestep.ODD])
+@pytest.mark.parametrize(
+    "stencil", [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27]
+)
+@pytest.mark.parametrize("streaming_pattern", streaming_patterns)
+@pytest.mark.parametrize("timestep", [Timestep.EVEN, Timestep.ODD])
 def test_slices_not_empty(stencil, streaming_pattern, timestep):
    stencil = LBStencil(stencil)
    arr = np.zeros((4,) * stencil.D + (stencil.Q,))
-    slices = get_communication_slices(stencil, streaming_pattern=streaming_pattern, prev_timestep=timestep,
-                                      ghost_layers=1)
+    slices = get_communication_slices(
+        stencil,
+        streaming_pattern=streaming_pattern,
+        prev_timestep=timestep,
+        ghost_layers=1,
+    )
    for _, slices_list in slices.items():
        for src, dst in slices_list:
            assert all(s != 0 for s in arr[src].shape)
            assert all(s != 0 for s in arr[dst].shape)


-@pytest.mark.parametrize('stencil', [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27])
-@pytest.mark.parametrize('streaming_pattern', streaming_patterns)
-@pytest.mark.parametrize('timestep', [Timestep.EVEN, Timestep.ODD])
+@pytest.mark.parametrize(
+    "stencil", [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27]
+)
+@pytest.mark.parametrize("streaming_pattern", streaming_patterns)
+@pytest.mark.parametrize("timestep", [Timestep.EVEN, Timestep.ODD])
 def test_src_dst_same_shape(stencil, streaming_pattern, timestep):
    stencil = LBStencil(stencil)
    arr = np.zeros((4,) * stencil.D + (stencil.Q,))
-    slices = get_communication_slices(stencil, streaming_pattern=streaming_pattern, prev_timestep=timestep,
-                                      ghost_layers=1)
+    slices = get_communication_slices(
+        stencil,
+        streaming_pattern=streaming_pattern,
+        prev_timestep=timestep,
+        ghost_layers=1,
+    )
    for _, slices_list in slices.items():
        for src, dst in slices_list:
            src_shape = arr[src].shape
@@ -42,12 +58,15 @@ def test_src_dst_same_shape(stencil, streaming_pattern, timestep):
            assert src_shape == dst_shape


-@pytest.mark.parametrize('stencil', [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27])
+@pytest.mark.parametrize(
+    "stencil", [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27]
+)
 def test_pull_communication_slices(stencil):
    stencil = LBStencil(stencil)

    slices = get_communication_slices(
-        stencil, streaming_pattern='pull', prev_timestep=Timestep.BOTH, ghost_layers=1)
+        stencil, streaming_pattern="pull", prev_timestep=Timestep.BOTH, ghost_layers=1
+    )
    for i, d in enumerate(stencil):
        if i == 0:
            continue
@@ -58,21 +77,115 @@ def test_pull_communication_slices(stencil):
                dst = s[1][:-1]
                break

-        inner_slice = _fix_length_one_slices(get_slice_before_ghost_layer(d, ghost_layers=1))
+        inner_slice = _fix_length_one_slices(
+            get_slice_before_ghost_layer(d, ghost_layers=1)
+        )
        inv_dir = (-e for e in d)
-        gl_slice = _fix_length_one_slices(get_ghost_region_slice(inv_dir, ghost_layers=1))
+        gl_slice = _fix_length_one_slices(
+            get_ghost_region_slice(inv_dir, ghost_layers=1)
+        )
        assert src == inner_slice
        assert dst == gl_slice


-@pytest.mark.parametrize('stencil_name', [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27])
+@pytest.mark.parametrize("direction", LBStencil(Stencil.D3Q27).stencil_entries)
+@pytest.mark.parametrize("pull", [False, True])
+def test_gpu_comm_kernels(direction: tuple, pull: bool):
+    pytest.importorskip("cupy")
+
+    stencil = LBStencil(Stencil.D3Q27)
+    inv_dir = stencil[stencil.inverse_index(direction)]
+    target = ps.Target.GPU
+
+    domain_size = (4,) * stencil.D
+
+    dh: ps.datahandling.SerialDataHandling = ps.create_data_handling(
+        domain_size,
+        periodicity=(True,) * stencil.D,
+        parallel=False,
+        default_target=target,
+    )
+
+    field = dh.add_array("field", values_per_cell=2)
+
+    if pull:
+        dst_slice = get_ghost_region_slice(inv_dir)
+        src_slice = get_slice_before_ghost_layer(direction)
+    else:
+        dst_slice = get_slice_before_ghost_layer(direction)
+        src_slice = get_ghost_region_slice(inv_dir)
+
+    src_slice += (1,)
+    dst_slice += (1,)
+
+    kernel = periodic_pdf_gpu_copy_kernel(field, src_slice, dst_slice)
+
+    dh.cpu_arrays[field.name][src_slice] = 42.0
+    dh.all_to_gpu()
+
+    dh.run_kernel(kernel)
+
+    dh.all_to_cpu()
+    np.testing.assert_equal(dh.cpu_arrays[field.name][dst_slice], 42.0)
+
+
+@pytest.mark.parametrize("stencil", [Stencil.D2Q9, Stencil.D3Q19])
+@pytest.mark.parametrize("streaming_pattern", streaming_patterns)
+def test_direct_copy_and_kernels_equivalence(stencil: Stencil, streaming_pattern: str):
+    pytest.importorskip("cupy")
+
+    target = ps.Target.GPU
+    stencil = LBStencil(stencil)
+    domain_size = (4,) * stencil.D
+
+    dh: ps.datahandling.SerialDataHandling = ps.create_data_handling(
+        domain_size,
+        periodicity=(True,) * stencil.D,
+        parallel=False,
+        default_target=target,
+    )
+
+    pdfs_a = dh.add_array("pdfs_a", values_per_cell=stencil.Q)
+    pdfs_b = dh.add_array("pdfs_b", values_per_cell=stencil.Q)
+
+    dh.fill(pdfs_a.name, 0.0, ghost_layers=True)
+    dh.fill(pdfs_b.name, 0.0, ghost_layers=True)
+
+    for q in range(stencil.Q):
+        sl = ps.make_slice[:4, :4, q] if stencil.D == 2 else ps.make_slice[:4, :4, :4, q]
+        dh.cpu_arrays[pdfs_a.name][sl] = q
+        dh.cpu_arrays[pdfs_b.name][sl] = q
+
+    dh.all_to_gpu()
+
+    direct_copy = LBMPeriodicityHandling(stencil, dh, pdfs_a.name, streaming_pattern, cupy_direct_copy=True)
+    kernels_copy = LBMPeriodicityHandling(stencil, dh, pdfs_b.name, streaming_pattern, cupy_direct_copy=False)
+
+    direct_copy(Timestep.EVEN)
+    kernels_copy(Timestep.EVEN)
+
+    dh.all_to_cpu()
+
+    np.testing.assert_equal(
+        dh.cpu_arrays[pdfs_a.name],
+        dh.cpu_arrays[pdfs_b.name]
+    )
+
+
+@pytest.mark.parametrize(
+    "stencil_name", [Stencil.D2Q9, Stencil.D3Q15, Stencil.D3Q19, Stencil.D3Q27]
+)
 def test_optimised_and_full_communication_equivalence(stencil_name):
    target = ps.Target.CPU
    stencil = LBStencil(stencil_name)
-    domain_size = (4, ) * stencil.D
+    domain_size = (4,) * stencil.D

-    dh = ps.create_data_handling(domain_size, periodicity=(True, ) * stencil.D,
-                                 parallel=False, default_target=target)
+    dh = ps.create_data_handling(
+        domain_size,
+        periodicity=(True,) * stencil.D,
+        parallel=False,
+        default_target=target,
+    )

    pdf = dh.add_array("pdf", values_per_cell=len(stencil), dtype=np.int64)
    dh.fill("pdf", 0, ghost_layers=True)
@@ -82,9 +195,9 @@ def test_optimised_and_full_communication_equivalence(stencil_name):
    gl = dh.ghost_layers_of_field("pdf")

    num = 0
-    for idx, x in np.ndenumerate(dh.cpu_arrays['pdf']):
-        dh.cpu_arrays['pdf'][idx] = num
-        dh.cpu_arrays['pdf_tmp'][idx] = num
+    for idx, x in np.ndenumerate(dh.cpu_arrays["pdf"]):
+        dh.cpu_arrays["pdf"][idx] = num
+        dh.cpu_arrays["pdf_tmp"][idx] = num
        num += 1

    lbm_config = LBMConfig(stencil=stencil, kernel_type="stream_pull_only")
@@ -95,21 +208,27 @@ def test_optimised_and_full_communication_equivalence(stencil_name):
    ast = ps.create_kernel(ac, config=config)
    stream = ast.compile()

-    full_communication = dh.synchronization_function(pdf.name, target=dh.default_target, optimization={"openmp": True})
+    full_communication = dh.synchronization_function(
+        pdf.name, target=dh.default_target, optimization={"openmp": True}
+    )
    full_communication()

    dh.run_kernel(stream)
    dh.swap("pdf", "pdf_tmp")
-    pdf_full_communication = np.copy(dh.cpu_arrays['pdf'])
+    pdf_full_communication = np.copy(dh.cpu_arrays["pdf"])

    num = 0
-    for idx, x in np.ndenumerate(dh.cpu_arrays['pdf']):
-        dh.cpu_arrays['pdf'][idx] = num
-        dh.cpu_arrays['pdf_tmp'][idx] = num
+    for idx, x in np.ndenumerate(dh.cpu_arrays["pdf"]):
+        dh.cpu_arrays["pdf"][idx] = num
+        dh.cpu_arrays["pdf_tmp"][idx] = num
        num += 1

-    optimised_communication = LBMPeriodicityHandling(stencil=stencil, data_handling=dh, pdf_field_name=pdf.name,
-                                                     streaming_pattern='pull')
+    optimised_communication = LBMPeriodicityHandling(
+        stencil=stencil,
+        data_handling=dh,
+        pdf_field_name=pdf.name,
+        streaming_pattern="pull",
+    )
    optimised_communication()
    dh.run_kernel(stream)
    dh.swap("pdf", "pdf_tmp")
@@ -119,9 +238,14 @@ def test_optimised_and_full_communication_equivalence(stencil_name):
            for j in range(gl, domain_size[1]):
                for k in range(gl, domain_size[2]):
                    for f in range(len(stencil)):
-                        assert dh.cpu_arrays['pdf'][i, j, k, f] == pdf_full_communication[i, j, k, f], print(f)
+                        assert (
+                            dh.cpu_arrays["pdf"][i, j, k, f]
+                            == pdf_full_communication[i, j, k, f]
+                        ), print(f)
    else:
        for i in range(gl, domain_size[0]):
            for j in range(gl, domain_size[1]):
                for f in range(len(stencil)):
-                    assert dh.cpu_arrays['pdf'][i, j, f] == pdf_full_communication[i, j, f]
+                    assert (
+                        dh.cpu_arrays["pdf"][i, j, f] == pdf_full_communication[i, j, f]
+                    )
--- a/tests/full_scenarios/shear_wave/scenario_shear_wave.py
+++ b/tests/full_scenarios/shear_wave/scenario_shear_wave.py
@@ -202,19 +202,19 @@ def create_full_parameter_study():

    config = CreateKernelConfig(target=Target.CPU)

-    methods = [LBMConfig(method=Method.TRT, relaxation_rates=[omega, 1]),
-               LBMConfig(method=Method.MRT, relaxation_rates=[omega], equilibrium_order=2),
-               LBMConfig(method=Method.MRT, relaxation_rates=[omega], equilibrium_order=3),
-               LBMConfig(method=Method.CUMULANT, relaxation_rates=[omega],  # cumulant
+    methods = [LBMConfig(stencil=LBStencil(Stencil.D3Q27), method=Method.TRT, relaxation_rates=[omega, 1]),
+               LBMConfig(stencil=LBStencil(Stencil.D3Q27), method=Method.MRT, relaxation_rates=[omega],
+                         equilibrium_order=2),
+               LBMConfig(stencil=LBStencil(Stencil.D3Q27), method=Method.MRT, relaxation_rates=[omega],
+                         equilibrium_order=3),
+               LBMConfig(stencil=LBStencil(Stencil.D3Q27), method=Method.CUMULANT, relaxation_rates=[omega],  # cumulant
                         compressible=True, continuous_equilibrium=True, equilibrium_order=3),
-               LBMConfig(method=Method.CUMULANT, relaxation_rates=[omega],  # cumulant with fourth-order correction
+               LBMConfig(stencil=LBStencil(Stencil.D3Q27), method=Method.CUMULANT, relaxation_rates=[omega],  # cumulant with fourth-order correction
                         compressible=True, continuous_equilibrium=True, fourth_order_correction=0.1),
-               LBMConfig(method=Method.TRT_KBC_N4, relaxation_rates=[omega, omega_f], entropic=True,
-                         zero_centered=False,  # entropic order 2
-                         continuous_equilibrium=True, equilibrium_order=2),
-               LBMConfig(method=Method.TRT_KBC_N4, relaxation_rates=[omega, omega_f], entropic=True,
-                         zero_centered=False,  # entropic order 3
-                         continuous_equilibrium=True, equilibrium_order=3),
+               LBMConfig(stencil=LBStencil(Stencil.D3Q27), method=Method.TRT_KBC_N4, relaxation_rates=[omega, omega_f],
+                         entropic=True, zero_centered=False, continuous_equilibrium=True, equilibrium_order=2),
+               LBMConfig(stencil=LBStencil(Stencil.D3Q27), method=Method.TRT_KBC_N4, relaxation_rates=[omega, omega_f],
+                         entropic=True, zero_centered=False, continuous_equilibrium=True, equilibrium_order=3),

               # entropic cumulant: not supported for the moment
               # LBMConfig(method=Method.CUMULANT, relaxation_rates=["omega", "omega_f"], entropic=True, zero_centered=False,

--- a/tests/phasefield/test_entropic_model.ipynb
+++ b/tests/phasefield/test_entropic_model.ipynb
--- a/tests/phasefield/test_n_phase_boyer_noncoupled.ipynb
+++ b/tests/phasefield/test_n_phase_boyer_noncoupled.ipynb
 %% Cell type:code id: tags:

 ``` python
 import pytest
 pytest.importorskip('cupy')
 ```

 %% Output

    <module 'cupy' from '/home/markus/.local/lib/python3.11/site-packages/cupy/__init__.py'>

 %% Cell type:code id: tags:

 ``` python
 from lbmpy.session import *
 from lbmpy.phasefield.n_phase_boyer import *
 from lbmpy.phasefield.kerneleqs import *
 from lbmpy.phasefield.contact_angle_circle_fitting import *
-from scipy.ndimage.filters import gaussian_filter
+from scipy.ndimage import gaussian_filter
 from pystencils.simp import sympy_cse_on_assignment_list
 one = sp.sympify(1)

 import pyximport
 pyximport.install(language_level=3)
 from lbmpy.phasefield.simplex_projection import simplex_projection_2d  # NOQA
 ```

 %% Cell type:markdown id: tags:

 # Simulation arbitrary surface tension case

 %% Cell type:code id: tags:

 ``` python
 n = 4
 dx, dt = 1, 1
 mobility = 2e-3
 domain_size = (150, 150)
 ε = one * 4
 penalty_factor = 0
 stabilization_factor = 10

 κ = (one,  one/2, one/3, one/4)
 sigma_factor = one / 15
 σ = sp.ImmutableDenseMatrix(n, n, lambda i,j: sigma_factor* (κ[i] + κ[j]) if i != j else 0 )
 #σ
 ```

 %% Cell type:code id: tags:

 ``` python
 dh = create_data_handling(domain_size, periodicity=True, default_target=ps.Target.GPU)
 c = dh.add_array('c', values_per_cell=n)
 c_tmp = dh.add_array_like('c_tmp', 'c')

 μ = dh.add_array('mu', values_per_cell=n)

 cvec = c.center_vector
 μvec = μ.center_vector
 ```

 %% Cell type:code id: tags:

 ``` python
 α, _ = diffusion_coefficients(σ)

 f = lambda c: c**2 * ( 1 - c ) **2
 a, b = compute_ab(f)

 capital_f = capital_f0(cvec, σ) + correction_g(cvec, σ) + stabilization_factor * stabilization_term(cvec, α)

 f_bulk = free_energy_bulk(capital_f, b, ε) + penalty_factor * (one - sum(cvec))
 f_if = free_energy_interfacial(cvec, σ, a, ε)
 f = f_bulk + f_if
 ```

 %% Cell type:code id: tags:

 ``` python
 #f_bulk
 ```

 %% Cell type:code id: tags:

 ``` python
 μ_assignments = mu_kernel(f, cvec, c, μ)
 μ_assignments = [Assignment(a.lhs, a.rhs.doit()) for a in μ_assignments]
 μ_assignments = sympy_cse_on_assignment_list(μ_assignments)
 ```

 %% Cell type:code id: tags:

 ``` python
 discretize = fd.Discretization2ndOrder(dx=dx, dt=dt)
 ```

 %% Cell type:code id: tags:

 ``` python
 def lapl(e):
    return sum(ps.fd.diff(e, d, d) for d in range(dh.dim))
 ```

 %% Cell type:code id: tags:

 ``` python
 rhs = α * μvec
 discretized_rhs = [discretize(fd.expand_diff_full( lapl(mobility * rhs_i) + fd.transient(cvec[i], idx=i), functions=μvec))
                   for i, rhs_i in enumerate(rhs)]
 c_assignments = [Assignment(lhs, rhs)
                 for lhs, rhs in zip(c_tmp.center_vector, discretized_rhs)]
 ```

 %% Cell type:code id: tags:

 ``` python
 #c_assignments
 ```

 %% Cell type:code id: tags:

 ``` python
 μ_sync = dh.synchronization_function(μ.name)
 c_sync = dh.synchronization_function(c.name)
 optimization = {'cpu_openmp': False, 'cpu_vectorize_info': None}

 config = ps.CreateKernelConfig(cpu_openmp=False, target=dh.default_target)

 μ_kernel = create_kernel(μ_assignments, config=config).compile()
 c_kernel = create_kernel(c_assignments, config=config).compile()

 def set_c(slice_obj, values):
    for block in dh.iterate(slice_obj):
        arr = block[c.name]
        arr[..., : ] = values

 def smooth():
    for block in dh.iterate(ghost_layers=True):
        c_arr = block[c.name]
        for i in range(n):
            gaussian_filter(c_arr[..., i], sigma=2, output=c_arr[..., i])

 def time_loop(steps):
    dh.all_to_gpu()
    for t in range(steps):
        c_sync()
        dh.run_kernel(μ_kernel)
        μ_sync()
        dh.run_kernel(c_kernel)
        dh.swap(c.name, c_tmp.name)
        #simplex_projection_2d(dh.cpu_arrays[c.name])
    dh.all_to_cpu()
 ```

 %% Cell type:code id: tags:

 ``` python
 set_c(make_slice[:, :], [0, 0, 0, 0])
 set_c(make_slice[:, 0.5:], [1, 0, 0, 0])
 set_c(make_slice[:, :0.5], [0, 1, 0, 0])
 set_c(make_slice[0.3:0.7, 0.3:0.7], [0, 0, 1, 0])
 smooth()
 ```

 %% Cell type:code id: tags:

 ``` python
 #dh.load_all('n_phases_state_size200_stab10.npz')
 ```

 %% Cell type:code id: tags:

 ``` python
 plt.phase_plot(dh.gather_array(c.name))
 ```

 %% Cell type:code id: tags:

 ``` python
 neumann_angles_from_surface_tensions(lambda i, j: float(σ[i, j]))
 ```

 %% Cell type:code id: tags:

 ``` python
 import time
 for i in range(10):
    start = time.perf_counter()
    time_loop(1_000)
    end = time.perf_counter()

    try:
        print(i, end - start, liquid_lens_neumann_angles(dh.gather_array(c.name)))
    except Exception:
        print(i, end - start, "none found")
 ```

 %% Cell type:code id: tags:

 ``` python
 plt.subplot(1,3,1)
 t = dh.gather_array(c.name, make_slice[25, :]).squeeze()
 plt.plot(t);

 plt.subplot(1,3,2)
 plt.phase_plot(dh.gather_array(c.name), linewidth=1)

 plt.subplot(1,3,3)
 plt.scalar_field(dh.gather_array(μ.name)[:, :, 2])
 plt.colorbar();
 ```

 %% Cell type:code id: tags:

 ``` python
 assert not np.isnan(dh.max(c.name))
 ```

 %% Cell type:code id: tags:

 ``` python
 t = dh.gather_array(c.name, make_slice[25, 55:90]).squeeze()
 plt.hlines(0.5, 0, 30)
 plt.plot(t);
 ```

 %% Cell type:code id: tags:

 ``` python
 import pytest
 pytest.importorskip('cupy')
 ```

 %% Output

    <module 'cupy' from '/home/markus/.local/lib/python3.11/site-packages/cupy/__init__.py'>

 %% Cell type:code id: tags:

 ``` python
 from lbmpy.session import *
 from lbmpy.phasefield.n_phase_boyer import *
 from lbmpy.phasefield.kerneleqs import *
 from lbmpy.phasefield.contact_angle_circle_fitting import *
-from scipy.ndimage.filters import gaussian_filter
+from scipy.ndimage import gaussian_filter
 from pystencils.simp import sympy_cse_on_assignment_list
 one = sp.sympify(1)

 import pyximport
 pyximport.install(language_level=3)
 from lbmpy.phasefield.simplex_projection import simplex_projection_2d  # NOQA
 ```

 %% Cell type:markdown id: tags:

 # Simulation arbitrary surface tension case

 %% Cell type:code id: tags:

 ``` python
 n = 4
 dx, dt = 1, 1
 mobility = 2e-3
 domain_size = (150, 150)
 ε = one * 4
 penalty_factor = 0
 stabilization_factor = 10

 κ = (one,  one/2, one/3, one/4)
 sigma_factor = one / 15
 σ = sp.ImmutableDenseMatrix(n, n, lambda i,j: sigma_factor* (κ[i] + κ[j]) if i != j else 0 )
 #σ
 ```

 %% Cell type:code id: tags:

 ``` python
 dh = create_data_handling(domain_size, periodicity=True, default_target=ps.Target.GPU)
 c = dh.add_array('c', values_per_cell=n)
 c_tmp = dh.add_array_like('c_tmp', 'c')

 μ = dh.add_array('mu', values_per_cell=n)

 cvec = c.center_vector
 μvec = μ.center_vector
 ```

 %% Cell type:code id: tags:

 ``` python
 α, _ = diffusion_coefficients(σ)

 f = lambda c: c**2 * ( 1 - c ) **2
 a, b = compute_ab(f)

 capital_f = capital_f0(cvec, σ) + correction_g(cvec, σ) + stabilization_factor * stabilization_term(cvec, α)

 f_bulk = free_energy_bulk(capital_f, b, ε) + penalty_factor * (one - sum(cvec))
 f_if = free_energy_interfacial(cvec, σ, a, ε)
 f = f_bulk + f_if
 ```

 %% Cell type:code id: tags:

 ``` python
 #f_bulk
 ```

 %% Cell type:code id: tags:

 ``` python
 μ_assignments = mu_kernel(f, cvec, c, μ)
 μ_assignments = [Assignment(a.lhs, a.rhs.doit()) for a in μ_assignments]
 μ_assignments = sympy_cse_on_assignment_list(μ_assignments)
 ```

 %% Cell type:code id: tags:

 ``` python
 discretize = fd.Discretization2ndOrder(dx=dx, dt=dt)
 ```

 %% Cell type:code id: tags:

 ``` python
 def lapl(e):
    return sum(ps.fd.diff(e, d, d) for d in range(dh.dim))
 ```

 %% Cell type:code id: tags:

 ``` python
 rhs = α * μvec
 discretized_rhs = [discretize(fd.expand_diff_full( lapl(mobility * rhs_i) + fd.transient(cvec[i], idx=i), functions=μvec))
                   for i, rhs_i in enumerate(rhs)]
 c_assignments = [Assignment(lhs, rhs)
                 for lhs, rhs in zip(c_tmp.center_vector, discretized_rhs)]
 ```

 %% Cell type:code id: tags:

 ``` python
 #c_assignments
 ```

 %% Cell type:code id: tags:

 ``` python
 μ_sync = dh.synchronization_function(μ.name)
 c_sync = dh.synchronization_function(c.name)
 optimization = {'cpu_openmp': False, 'cpu_vectorize_info': None}

 config = ps.CreateKernelConfig(cpu_openmp=False, target=dh.default_target)

 μ_kernel = create_kernel(μ_assignments, config=config).compile()
 c_kernel = create_kernel(c_assignments, config=config).compile()

 def set_c(slice_obj, values):
    for block in dh.iterate(slice_obj):
        arr = block[c.name]
        arr[..., : ] = values

 def smooth():
    for block in dh.iterate(ghost_layers=True):
        c_arr = block[c.name]
        for i in range(n):
            gaussian_filter(c_arr[..., i], sigma=2, output=c_arr[..., i])

 def time_loop(steps):
    dh.all_to_gpu()
    for t in range(steps):
        c_sync()
        dh.run_kernel(μ_kernel)
        μ_sync()
        dh.run_kernel(c_kernel)
        dh.swap(c.name, c_tmp.name)
        #simplex_projection_2d(dh.cpu_arrays[c.name])
    dh.all_to_cpu()
 ```

 %% Cell type:code id: tags:

 ``` python
 set_c(make_slice[:, :], [0, 0, 0, 0])
 set_c(make_slice[:, 0.5:], [1, 0, 0, 0])
 set_c(make_slice[:, :0.5], [0, 1, 0, 0])
 set_c(make_slice[0.3:0.7, 0.3:0.7], [0, 0, 1, 0])
 smooth()
 ```

 %% Cell type:code id: tags:

 ``` python
 #dh.load_all('n_phases_state_size200_stab10.npz')
 ```

 %% Cell type:code id: tags:

 ``` python
 plt.phase_plot(dh.gather_array(c.name))
 ```

 %% Cell type:code id: tags:

 ``` python
 neumann_angles_from_surface_tensions(lambda i, j: float(σ[i, j]))
 ```

 %% Cell type:code id: tags:

 ``` python
 import time
 for i in range(10):
    start = time.perf_counter()
    time_loop(1_000)
    end = time.perf_counter()

    try:
        print(i, end - start, liquid_lens_neumann_angles(dh.gather_array(c.name)))
    except Exception:
        print(i, end - start, "none found")
 ```

 %% Cell type:code id: tags:

 ``` python
 plt.subplot(1,3,1)
 t = dh.gather_array(c.name, make_slice[25, :]).squeeze()
 plt.plot(t);

 plt.subplot(1,3,2)
 plt.phase_plot(dh.gather_array(c.name), linewidth=1)

 plt.subplot(1,3,3)
 plt.scalar_field(dh.gather_array(μ.name)[:, :, 2])
 plt.colorbar();
 ```

 %% Cell type:code id: tags:

 ``` python
 assert not np.isnan(dh.max(c.name))
 ```

 %% Cell type:code id: tags:

 ``` python
 t = dh.gather_array(c.name, make_slice[25, 55:90]).squeeze()
 plt.hlines(0.5, 0, 30)
 plt.plot(t);
 ```

--- a/tests/phasefield_allen_cahn/test_analytical.py
+++ b/tests/phasefield_allen_cahn/test_analytical.py
@@ -44,6 +44,6 @@ def test_analytical():
    assert np.isclose(parameters.density_heavy, 1.0)
    assert np.isclose(parameters.density_light, 0.001207114228456914)
    assert np.isclose(parameters.dynamic_viscosity_heavy, 5.733727652152216e-05)
-    assert np.isclose(parameters.dynamic_viscosity_light, 0.0008630017037694861)
+    assert np.isclose(parameters.dynamic_viscosity_light, 1.0417416358027054e-06)
    assert np.isclose(parameters.gravitational_acceleration, -7.407407407407407e-08)
    assert np.isclose(parameters.surface_tension, 3.149857262258028e-05, rtol=1e-05)
--- a/tests/test_boundary_handling.py
+++ b/tests/test_boundary_handling.py
 import numpy as np
 import pytest

-from lbmpy.boundaries import NoSlip, UBB, SimpleExtrapolationOutflow, ExtrapolationOutflow, \
-    FixedDensity, DiffusionDirichlet, NeumannByCopy, StreamInConstant, FreeSlip
-from lbmpy.boundaries.boundaryhandling import LatticeBoltzmannBoundaryHandling
+from lbmpy.boundaries import (NoSlip, NoSlipLinearBouzidi, QuadraticBounceBack,
+                              UBB, SimpleExtrapolationOutflow, ExtrapolationOutflow, FixedDensity, DiffusionDirichlet,
+                              NeumannByCopy, StreamInConstant, FreeSlip)
+from lbmpy.boundaries.boundaryhandling import LatticeBoltzmannBoundaryHandling, create_lattice_boltzmann_boundary_kernel
 from lbmpy.creationfunctions import create_lb_function, create_lb_method, LBMConfig
 from lbmpy.enums import Stencil, Method
 from lbmpy.geometry import add_box_boundary
 from lbmpy.lbstep import LatticeBoltzmannStep
 from lbmpy.stencils import LBStencil
+import pystencils as ps
 from pystencils import create_data_handling, make_slice, Target, CreateKernelConfig
 from pystencils.slicing import slice_from_direction
 from pystencils.stencil import inverse_direction
@@ -435,3 +437,45 @@ def test_boundary_utility_functions():
    assert stream == StreamInConstant(constant=1.0, name="stream")
    assert not stream == StreamInConstant(constant=1.0, name="test")
    assert not stream == noslip
+
+
+@pytest.mark.parametrize("given_force_vector", [True, False])
+@pytest.mark.parametrize("dtype", ["float32", "float64"])
+def test_force_on_boundary(given_force_vector, dtype):
+    stencil = LBStencil(Stencil.D2Q9)
+    pdfs = ps.fields(f"pdfs_src({stencil.Q}): {dtype}[{stencil.D}D]", layout='fzyx')
+
+    method = create_lb_method(lbm_config=LBMConfig(stencil=stencil, method=Method.SRT, relaxation_rate=1.8))
+
+    noslip = NoSlip(name="noslip", calculate_force_on_boundary=True)
+    bouzidi = NoSlipLinearBouzidi(name="bouzidi", calculate_force_on_boundary=True)
+    qq_bounce_Back = QuadraticBounceBack(name="qqBB", relaxation_rate=1.8, calculate_force_on_boundary=True)
+
+    boundary_objects = [noslip, bouzidi, qq_bounce_Back]
+    for boundary in boundary_objects:
+        if given_force_vector:
+            force_vector_type = np.dtype([(f"F_{i}", dtype) for i in range(stencil.D)], align=True)
+            force_vector = ps.Field('forceVector', ps.FieldType.INDEXED, force_vector_type, layout=[0],
+                                    shape=(ps.TypedSymbol("forceVectorSize", "int32"), 1), strides=(1, 1))
+        else:
+            force_vector = None
+
+        index_struct_dtype = _numpy_data_type_for_boundary_object(boundary, stencil.D)
+
+        index_field = ps.Field('indexVector', ps.FieldType.INDEXED, index_struct_dtype, layout=[0],
+                               shape=(ps.TypedSymbol("indexVectorSize", "int32"), 1), strides=(1, 1))
+
+        create_lattice_boltzmann_boundary_kernel(pdfs, index_field, method, boundary, force_vector=force_vector)
+
+
+def _numpy_data_type_for_boundary_object(boundary_object, dim):
+    boundary_index_array_coordinate_names = ["x", "y", "z"]
+    direction_member_name = "dir"
+    default_index_array_dtype = np.int32
+
+    coordinate_names = boundary_index_array_coordinate_names[:dim]
+    return np.dtype(
+        [(name, default_index_array_dtype) for name in coordinate_names]
+        + [(direction_member_name, default_index_array_dtype)]
+        + [(i[0], i[1].numpy_dtype) for i in boundary_object.additional_data],
+        align=True,)
No results found