Compare revisions

cf17a9b9 · cf17a9b9 · cf17a9b9 · cf17a9b9 · cf17a9b9 · d3f62364
--- a/lbmpy_tests/test_split_optimization.py
+++ b/lbmpy_tests/test_split_optimization.py
-import numpy as np
-import pytest
-
-from lbmpy.creationfunctions import create_lb_ast
-from lbmpy.scenarios import create_lid_driven_cavity
-from pystencils.sympyextensions import count_operations_in_ast
-from sympy.core.cache import clear_cache
-
-
-def test_split_number_of_operations():
-    # For the following configurations the number of operations for splitted and un-splitted version are
-    # exactly equal. This is not true for D3Q15 and D3Q27 because some sub-expressions are computed in multiple
-    # splitted, inner loops.
-    for stencil in ['D2Q9', 'D3Q19']:
-        for compressible in (True, False):
-            for method in ('srt', 'trt'):
-                common_params = {'stencil': stencil,
-                                 'method': method,
-                                 'compressible': compressible,
-                                 'force_model': 'luo',
-                                 'force': (1e-6, 1e-5, 1e-7)
-                                 }
-                ast_with_splitting = create_lb_ast(optimization={'split': True}, **common_params)
-                ast_without_splitting = create_lb_ast(optimization={'split': False}, **common_params)
-
-                op_with_splitting = count_operations_in_ast(ast_with_splitting)
-                op_without_splitting = count_operations_in_ast(ast_without_splitting)
-                assert op_without_splitting['muls'] == op_with_splitting['muls']
-                assert op_without_splitting['adds'] == op_with_splitting['adds']
-                assert op_without_splitting['divs'] == op_with_splitting['divs']
-
-
-@pytest.mark.parametrize('stencil', ['D2Q9', 'D3Q15', 'D3Q19', 'D3Q27'])
-@pytest.mark.parametrize('compressible', [True, False])
-@pytest.mark.parametrize('method', ['srt', 'mrt'])
-@pytest.mark.parametrize('force', [(0, 0, 0), (1e-6, 1e-7, 2e-6)])
-@pytest.mark.longrun
-def test_equivalence(stencil, compressible, method, force):
-    relaxation_rates = [1.8, 1.7, 1.0, 1.0, 1.0, 1.0]
-    clear_cache()
-    common_params = {'domain_size': (10, 20) if stencil.startswith('D2') else (5, 10, 7),
-                     'stencil': stencil,
-                     'method': method,
-                     'weighted': True,
-                     'compressible': compressible,
-                     'force': force,
-                     'force_model': 'schiller',
-                     'relaxation_rates': relaxation_rates}
-    print("Running Scenario", common_params)
-    with_split = create_lid_driven_cavity(optimization={'split': True}, **common_params)
-    without_split = create_lid_driven_cavity(optimization={'split': False}, **common_params)
-    with_split.run(100)
-    without_split.run(100)
-    np.testing.assert_almost_equal(with_split.velocity_slice(), without_split.velocity_slice())
-
-
-def test_equivalence_short():
-    relaxation_rates = [1.8, 1.7, 1.0, 1.0, 1.0, 1.0]
-    for stencil, compressible, method, force in [('D2Q9', True, 'srt', 1e-7), ('D3Q19', False, 'mrt', 0)]:
-        dim = int(stencil[1])
-        common_params = {'domain_size': (20, 30) if stencil.startswith('D2') else (10, 13, 7),
-                         'stencil': stencil,
-                         'method': method,
-                         'weighted': True,
-                         'compressible': compressible,
-                         'force': (force, 0, 0)[:dim],
-                         'relaxation_rates': relaxation_rates}
-        print("Running Scenario", common_params)
-        with_split = create_lid_driven_cavity(optimization={'split': True}, **common_params)
-        without_split = create_lid_driven_cavity(optimization={'split': False}, **common_params)
-        with_split.run(100)
-        without_split.run(100)
-        np.testing.assert_almost_equal(with_split.velocity_slice(), without_split.velocity_slice())
--- a/lbmpy_tests/test_stencils.py
+++ b/lbmpy_tests/test_stencils.py
-import itertools
-import warnings
-
-import pytest
-import sympy as sp
-
-import lbmpy.stencils as s
-import pystencils as ps
-from lbmpy.stencils import get_stencil
-
-
-def get_3d_stencils():
-    return s.get_stencil('D3Q15'), s.get_stencil('D3Q19'), s.get_stencil('D3Q27')
-
-
-def get_all_stencils():
-    return [
-        s.get_stencil('D2Q9', 'walberla'),
-        s.get_stencil('D3Q15', 'walberla'),
-        s.get_stencil('D3Q19', 'walberla'),
-        s.get_stencil('D3Q27', 'walberla'),
-
-        s.get_stencil('D2Q9', 'counterclockwise'),
-
-        s.get_stencil('D2Q9', 'braunschweig'),
-        s.get_stencil('D3Q19', 'braunschweig'),
-
-        s.get_stencil('D3Q27', 'premnath'),
-
-        s.get_stencil("D3Q27", "fakhari"),
-    ]
-
-
-def test_sizes():
-    assert len(s.get_stencil('D2Q9')) == 9
-    assert len(s.get_stencil('D3Q15')) == 15
-    assert len(s.get_stencil('D3Q19')) == 19
-    assert len(s.get_stencil('D3Q27')) == 27
-
-
-def test_dimensionality():
-    for d in s.get_stencil('D2Q9'):
-        assert len(d) == 2
-
-    for d in itertools.chain(*get_3d_stencils()):
-        assert len(d) == 3
-
-
-def test_uniqueness():
-    for stencil in get_3d_stencils():
-        direction_set = set(stencil)
-        assert len(direction_set) == len(stencil)
-
-
-def test_run_self_check():
-    for st in get_all_stencils():
-        assert ps.stencil.is_valid(st, max_neighborhood=1)
-        assert ps.stencil.is_symmetric(st)
-
-
-def test_inverse_direction():
-    assert ps.stencil.inverse_direction((1, 0, -1)), (-1, 0 == 1)
-
-
-def test_free_functions():
-    assert not ps.stencil.is_symmetric([(1, 0), (0, 1)])
-    assert not ps.stencil.is_valid([(1, 0), (1, 1, 0)])
-    assert not ps.stencil.is_valid([(2, 0), (0, 1)], max_neighborhood=1)
-
-    with pytest.raises(ValueError) as e:
-        get_stencil("name_that_does_not_exist")
-    assert "No such stencil" in str(e.value)
-
-
-def test_visualize():
-    import matplotlib.pyplot as plt
-    plt.clf()
-    plt.cla()
-
-    d2q9, d3q19 = get_stencil("D2Q9"), get_stencil("D3Q19")
-    figure = plt.gcf()
-    with warnings.catch_warnings():
-        warnings.simplefilter("ignore")
-        ps.stencil.plot(d2q9, figure=figure, data=[str(i) for i in range(9)])
-        ps.stencil.plot(d3q19, figure=figure, data=sp.symbols("a_:19"))
--- a/lbmpy_tests/test_stokes_setup.ipynb
+++ b/lbmpy_tests/test_stokes_setup.ipynb
--- a/lbmpy_tests/test_vectorization.py
+++ b/lbmpy_tests/test_vectorization.py
-import numpy as np
-import pytest
-
-from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets
-from lbmpy.scenarios import create_lid_driven_cavity
-
-
-@pytest.mark.skipif(not get_supported_instruction_sets(), reason='cannot detect CPU instruction set')
-def test_lbm_vectorization_short():
-    print("Computing reference solutions")
-    size1 = (64, 32)
-    relaxation_rate = 1.8
-
-    ldc1_ref = create_lid_driven_cavity(size1, relaxation_rate=relaxation_rate)
-    ldc1_ref.run(10)
-
-    ldc1 = create_lid_driven_cavity(size1, relaxation_rate=relaxation_rate,
-                                    optimization={
-                                        'vectorization': {'instruction_set': get_supported_instruction_sets()[-1],
-                                                          'assume_aligned': True,
-                                                          'nontemporal': True,
-                                                          'assume_inner_stride_one': True,
-                                                          'assume_sufficient_line_padding': False,
-                                                          }},
-                                    fixed_loop_sizes=False)
-    ldc1.run(10)
-
-
-@pytest.mark.parametrize('instruction_set', get_supported_instruction_sets())
-@pytest.mark.parametrize('aligned_and_padding', [[False, False], [True, False], [True, True]])
-@pytest.mark.parametrize('nontemporal', [False, True])
-@pytest.mark.parametrize('double_precision', [False, True])
-@pytest.mark.parametrize('fixed_loop_sizes', [False, True])
-@pytest.mark.longrun
-def test_lbm_vectorization(instruction_set, aligned_and_padding, nontemporal, double_precision, fixed_loop_sizes):
-    vectorization_options = {'instruction_set': instruction_set,
-                             'assume_aligned': aligned_and_padding[0],
-                             'nontemporal': nontemporal,
-                             'assume_inner_stride_one': True,
-                             'assume_sufficient_line_padding': aligned_and_padding[1]}
-    time_steps = 100
-    size1 = (64, 32)
-    size2 = (666, 34)
-    relaxation_rate = 1.8
-
-    print("Computing reference solutions")
-    ldc1_ref = create_lid_driven_cavity(size1, relaxation_rate=relaxation_rate)
-    ldc1_ref.run(time_steps)
-    ldc2_ref = create_lid_driven_cavity(size2, relaxation_rate=relaxation_rate)
-    ldc2_ref.run(time_steps)
-
-    optimization = {'double_precision': double_precision,
-                    'vectorization': vectorization_options,
-                    'cse_global': True,
-                    }
-    print("Vectorization test, double precision {}, vectorization {}, fixed loop sizes {}".format(
-        double_precision, vectorization_options, fixed_loop_sizes))
-    ldc1 = create_lid_driven_cavity(size1, relaxation_rate=relaxation_rate, optimization=optimization,
-                                    fixed_loop_sizes=fixed_loop_sizes)
-    ldc1.run(time_steps)
-    np.testing.assert_almost_equal(ldc1_ref.velocity[:, :], ldc1.velocity[:, :])
-
-    optimization['split'] = True
-    ldc2 = create_lid_driven_cavity(size2, relaxation_rate=relaxation_rate, optimization=optimization,
-                                    fixed_loop_sizes=fixed_loop_sizes)
-    ldc2.run(time_steps)
-    np.testing.assert_almost_equal(ldc2_ref.velocity[:, :], ldc2.velocity[:, :])
-
-
-if __name__ == '__main__':
-    test_lbm_vectorization()
--- a/lbmpy_tests/walberla_scenario_setup.py
+++ b/lbmpy_tests/walberla_scenario_setup.py
-import pytest
-pytest.importorskip('waLBerla.field')
-
-import waLBerla.field as field
-from waLBerla import createUniformBlockGrid, createUniformBufferedScheme, makeSlice
-
-
-def create_walberla_lattice_model(stencil, method, relaxation_rates, compressible=False, order=2,
-                                  force_model='none', force=(0, 0, 0), **_):
-    from waLBerla import lbm
-
-    if method.lower() == 'srt':
-        collision_model = lbm.collisionModels.SRT(relaxation_rates[0])
-    elif method.lower() == 'trt':
-        collision_model = lbm.collisionModels.TRT(relaxation_rates[0], relaxation_rates[1])
-    elif method.lower() == 'mrt':
-        if stencil != 'D3Q19':
-            raise ValueError("MRT is available for D3Q19 only in walberla")
-        collision_model = lbm.collisionModels.D3Q19MRT(*relaxation_rates[1:7])
-    else:
-        raise ValueError("Unknown method: " + str(method))
-
-    if len(force) == 2:
-        force = (force[0], force[1], 0)
-
-    if force_model is None or force_model.lower() == 'none':
-        force_model = lbm.forceModels.NoForce()
-    elif force_model.lower() == 'simple':
-        force_model = lbm.forceModels.SimpleConstant(force)
-    elif force_model.lower() == 'luo':
-        force_model = lbm.forceModels.LuoConstant(force)
-    elif force_model.lower() == 'guo':
-        force_model = lbm.forceModels.GuoConstant(force)
-    else:
-        raise ValueError("Unknown force model")
-    return lbm.makeLatticeModel(stencil, collision_model, force_model, compressible, order)
-
-
-def create_force_driven_channel_2d(force, radius, length, **kwargs):
-    from waLBerla import lbm
-
-    kwargs['force'] = tuple([force, 0, 0])
-
-    domain_size = (length, 2 * radius, 1)
-
-    lattice_model = create_walberla_lattice_model(**kwargs)
-    blocks = createUniformBlockGrid(cells=domain_size, periodic=(1, 0, 1))
-
-    # Adding fields
-    lbm.addPdfFieldToStorage(blocks, "pdfs", lattice_model, velocityAdaptor="vel", densityAdaptor="rho",
-                             initialDensity=1.0)
-    field.addFlagFieldToStorage(blocks, 'flags')
-    lbm.addBoundaryHandlingToStorage(blocks, 'boundary', 'pdfs', 'flags')
-
-    # Communication
-    communication = createUniformBufferedScheme(blocks, lattice_model.communicationStencilName)
-    communication.addDataToCommunicate(field.createPackInfo(blocks, 'pdfs'))
-
-    # Setting boundaries
-    for block in blocks:
-        b = block['boundary']
-        if block.atDomainMaxBorder[1]:  # N
-            b.forceBoundary('NoSlip', makeSlice[:, -1, :, 'g'])
-        if block.atDomainMinBorder[1]:  # S
-            b.forceBoundary('NoSlip', makeSlice[:, 0, :, 'g'])
-
-        b.fillWithDomain()
-
-    sweep = lbm.makeCellwiseSweep(blocks, "pdfs", flagFieldID='flags', flagList=['fluid']).streamCollide
-
-    def time_loop(time_steps):
-        for t in range(time_steps):
-            communication()
-            for B in blocks:
-                B['boundary']()
-            for B in blocks:
-                sweep(B)
-        full_pdf_field = field.toArray(field.gather(blocks, 'pdfs', makeSlice[:, :, :]), withGhostLayers=False)
-        density = field.toArray(field.gather(blocks, 'rho', makeSlice[:, :, :]), withGhostLayers=False)
-        velocity = field.toArray(field.gather(blocks, 'vel', makeSlice[:, :, :]), withGhostLayers=False)
-        full_pdf_field = full_pdf_field[:, :, 0, :]
-        density = density[:, :, 0, 0]
-        velocity = velocity[:, :, 0, :2]
-        return full_pdf_field, density, velocity
-
-    return time_loop
-
-
-def create_lid_driven_cavity(domain_size, lid_velocity=0.005, **kwargs):
-    from waLBerla import lbm
-
-    d = len(domain_size)
-
-    if 'stencil' not in kwargs:
-        kwargs['stencil'] = 'D2Q9' if d == 2 else 'D3Q27'
-
-    if d == 2:
-        domain_size = (domain_size[0], domain_size[1], 1)
-
-    lattice_model = create_walberla_lattice_model(**kwargs)
-    blocks = createUniformBlockGrid(cells=domain_size, periodic=(1, 1, 1))
-
-    # Adding fields
-    lbm.addPdfFieldToStorage(blocks, "pdfs", lattice_model, velocityAdaptor="vel", densityAdaptor="rho",
-                             initialDensity=1.0)
-    field.addFlagFieldToStorage(blocks, 'flags')
-    lbm.addBoundaryHandlingToStorage(blocks, 'boundary', 'pdfs', 'flags')
-
-    # Communication
-    communication = createUniformBufferedScheme(blocks, lattice_model.communicationStencilName)
-    communication.addDataToCommunicate(field.createPackInfo(blocks, 'pdfs'))
-
-    # Setting boundaries
-    for block in blocks:
-        b = block['boundary']
-        if block.atDomainMaxBorder[1]:  # N
-            b.forceBoundary('UBB', makeSlice[:, -1, :, 'g'], {'x': lid_velocity})
-        if block.atDomainMinBorder[1]:  # S
-            b.forceBoundary('NoSlip', makeSlice[:, 0, :, 'g'])
-        if block.atDomainMinBorder[0]:  # W
-            b.forceBoundary('NoSlip', makeSlice[0, :, :, 'g'])
-        if block.atDomainMaxBorder[0]:  # E
-            b.forceBoundary('NoSlip', makeSlice[-1, :, :, 'g'])
-        if block.atDomainMinBorder[2] and d == 3:  # T
-            b.forceBoundary('NoSlip', makeSlice[:, :, 0, 'g'])
-        if block.atDomainMaxBorder[2] and d == 3:  # B
-            b.forceBoundary('NoSlip', makeSlice[:, :, -1, 'g'])
-
-        b.fillWithDomain()
-
-    sweep = lbm.makeCellwiseSweep(blocks, "pdfs", flagFieldID='flags', flagList=['fluid']).streamCollide
-
-    def time_loop(time_steps):
-        for t in range(time_steps):
-            communication()
-            for B in blocks:
-                B['boundary']()
-            for B in blocks:
-                sweep(B)
-        full_pdf_field = field.toArray(field.gather(blocks, 'pdfs', makeSlice[:, :, :]), withGhostLayers=False)
-        density = field.toArray(field.gather(blocks, 'rho', makeSlice[:, :, :]), withGhostLayers=False)
-        velocity = field.toArray(field.gather(blocks, 'vel', makeSlice[:, :, :]), withGhostLayers=False)
-        if d == 2:
-            full_pdf_field = full_pdf_field[:, :, 0, :]
-            density = density[:, :, 0, 0]
-            velocity = velocity[:, :, 0, :2]
-        elif d == 3:
-            density = density[:, :, :, 0]
-
-        return full_pdf_field, density, velocity
-
-    return time_loop
--- a/noxfile.py
+++ b/noxfile.py
+from __future__ import annotations
+from typing import Sequence
+from argparse import ArgumentParser
+
+import os
+import nox
+import subprocess
+import re
+
+nox.options.sessions = ["lint", "typecheck"]
+
+
+def get_cuda_version(session: nox.Session) -> None | tuple[int, ...]:
+    query_args = ["nvcc", "--version"]
+
+    try:
+        query_result = subprocess.run(query_args, capture_output=True)
+    except FileNotFoundError:
+        return None
+
+    matches = re.findall(r"release \d+\.\d+", str(query_result.stdout))
+    if matches:
+        match = matches[0]
+        version_string = match.split()[-1]
+        try:
+            return tuple(int(v) for v in version_string.split("."))
+        except ValueError:
+            pass
+
+    session.warn("nvcc was found, but I am unable to determine the CUDA version.")
+    return None
+
+
+def install_cupy(
+    session: nox.Session, cupy_version: str, skip_if_no_cuda: bool = False
+):
+    if cupy_version is not None:
+        cuda_version = get_cuda_version(session)
+        if cuda_version is None or cuda_version[0] not in (11, 12):
+            if skip_if_no_cuda:
+                session.skip(
+                    "No compatible installation of CUDA found - Need either CUDA 11 or 12"
+                )
+            else:
+                session.warn(
+                    "Running without cupy: no compatbile installation of CUDA found. Need either CUDA 11 or 12."
+                )
+                return
+
+        cuda_major = cuda_version[0]
+        cupy_package = f"cupy-cuda{cuda_major}x=={cupy_version}"
+        session.install(cupy_package)
+
+
+def check_external_doc_dependencies(session: nox.Session):
+    dot_args = ["dot", "--version"]
+    try:
+        _ = subprocess.run(dot_args, capture_output=True)
+    except FileNotFoundError:
+        session.error(
+            "Unable to build documentation: "
+            "Command `dot` from the `graphviz` package (https://www.graphviz.org/) is not available"
+        )
+
+
+def editable_install(session: nox.Session, opts: Sequence[str] = ()):
+    if opts:
+        opts_str = "[" + ",".join(opts) + "]"
+    else:
+        opts_str = ""
+    session.install("-e", f".{opts_str}")
+
+
+def install_pystencils_master(session: nox.Session):
+    session.install("git+https://i10git.cs.fau.de/pycodegen/pystencils.git@master")
+
+
+def install_sympy_master(session: nox.Session):
+    session.install("--upgrade", "git+https://github.com/sympy/sympy.git@master")
+
+
+@nox.session(python="3.10", tags=["qa", "code-quality"])
+def lint(session: nox.Session):
+    """Lint code using flake8"""
+
+    session.install("flake8")
+    session.run("flake8", "src/lbmpy")
+
+
+@nox.session(python="3.10", tags=["qa", "code-quality"])
+def typecheck(session: nox.Session):
+    """Run MyPy for static type checking"""
+    editable_install(session)
+    session.install("mypy")
+    session.run("mypy", "src/lbmpy")
+
+
+def run_testsuite(session: nox.Session, coverage: bool = True):
+    num_cores = os.cpu_count()
+
+    args = [
+        "pytest",
+        "-v",
+        "-n",
+        str(num_cores),
+        "-m",
+        "not longrun",
+        "--html",
+        "test-report/index.html",
+        "--junitxml=report.xml",
+    ]
+
+    if coverage:
+        args += [
+            "--cov-report=term",
+            "--cov=.",
+        ]
+
+    session.run(*args)
+
+    if coverage:
+        session.run("coverage", "html")
+        session.run("coverage", "xml")
+
+
+@nox.session(python=["3.10", "3.11", "3.12", "3.13"])
+def testsuite_cpu(session: nox.Session):
+    install_pystencils_master(session)
+    editable_install(session, ["alltrafos", "use_cython", "interactive", "tests"])
+    run_testsuite(session, coverage=False)
+
+
+@nox.session(python=["3.10", "3.11", "3.12", "3.13"])
+@nox.parametrize("cupy_version", ["12", "13"], ids=["cupy12", "cupy13"])
+def testsuite_gpu(session: nox.Session, cupy_version: str | None):
+    install_cupy(session, cupy_version, skip_if_no_cuda=True)
+    install_pystencils_master(session)
+    editable_install(session, ["alltrafos", "use_cython", "interactive", "tests"])
+    run_testsuite(session)
+
+
+@nox.parametrize("cupy_version", [None, "12", "13"], ids=["cpu", "cupy12", "cupy13"])
+@nox.session(python="3.10", tags=["test"])
+def testsuite_pystencils2(session: nox.Session, cupy_version: str | None):
+    if cupy_version is not None:
+        install_cupy(session, cupy_version, skip_if_no_cuda=True)
+
+    session.install(
+        "git+https://i10git.cs.fau.de/pycodegen/pystencils.git@v2.0-dev"
+    )
+    editable_install(session, ["alltrafos", "use_cython", "interactive", "tests"])
+
+    run_testsuite(session)
+
+
+@nox.session
+def quicktest(session: nox.Session):
+    parser = ArgumentParser()
+    parser.add_argument(
+        "--sympy-master", action="store_true", help="Use latest SymPy master revision"
+    )
+    args = parser.parse_args(session.posargs)
+
+    install_pystencils_master(session)
+    editable_install(session)
+
+    if args.sympy_master:
+        install_sympy_master(session)
+
+    session.run("python", "quicktest.py")
--- a/pyproject.toml
+++ b/pyproject.toml
+[project]
+name = "lbmpy"
+description = "Code Generation for Lattice Boltzmann Methods"
+dynamic = ["version"]
+readme = "README.md"
+authors = [
+    { name = "Martin Bauer" },
+    { name = "Markus Holzer" },
+    { name = "Frederik Hennig" },
+    { email = "cs10-codegen@fau.de" },
+]
+license = { file = "COPYING.txt" }
+requires-python = ">=3.10"
+dependencies = ["pystencils>=1.3", "sympy>=1.12", "numpy>=1.8.0", "appdirs", "joblib", "packaging"]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Framework :: Jupyter",
+    "Topic :: Software Development :: Code Generators",
+    "Topic :: Scientific/Engineering :: Physics",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
+]
+
+[project.urls]
+"Bug Tracker" = "https://i10git.cs.fau.de/pycodegen/lbmpy/-/issues"
+"Documentation" = "https://pycodegen.pages.i10git.cs.fau.de/lbmpy/"
+"Source Code" = "https://i10git.cs.fau.de/pycodegen/lbmpy"
+
+[project.optional-dependencies]
+gpu = ['cupy']
+alltrafos = ['islpy', 'py-cpuinfo']
+bench_db = ['blitzdb', 'pymongo', 'pandas']
+interactive = [
+    'matplotlib',
+    'ipy_table',
+    'imageio',
+    'jupyter',
+    'pyevtk',
+    'rich',
+    'graphviz',
+    'scipy',
+    'scikit-image'
+]
+use_cython = [
+    'Cython'
+]
+doc = [
+    'sphinx',
+    'sphinx_rtd_theme',
+    'nbsphinx',
+    'sphinxcontrib-bibtex',
+    'sphinx_autodoc_typehints',
+    'pandoc',
+]
+tests = [
+    'pytest',
+    'pytest-cov',
+    'pytest-html',
+    'ansi2html',
+    'pytest-xdist',
+    'flake8',
+    'nbformat',
+    'nbconvert',
+    'ipython',
+    'randomgen>=1.18',
+]
+
+[build-system]
+requires = [
+    "setuptools>=69",
+    "versioneer[toml]>=0.29",
+]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools.package-data]
+
+[tool.setuptools.packages.find]
+where = ["src"]
+include = ["lbmpy", "lbmpy.*"]
+namespaces = false
+
+[tool.versioneer]
+# See the docstring in versioneer.py for instructions. Note that you must
+# re-run 'versioneer.py setup' after changing this section, and commit the
+# resulting files.
+VCS = "git"
+style = "pep440"
+versionfile_source = "src/lbmpy/_version.py"
+versionfile_build = "lbmpy/_version.py"
+tag_prefix = "release/"
+parentdir_prefix = "lbmpy-"
--- a/pytest.ini
+++ b/pytest.ini
 [pytest]
+testpaths = src tests doc/notebooks
+pythonpath = src
 python_files = test_*.py *_test.py scenario_*.py
 norecursedirs = *.egg-info .git .cache .ipynb_checkpoints htmlcov
-addopts = --doctest-modules --durations=20  --cov-config pytest.ini
+addopts = 
+       --doctest-modules --durations=20
+       --cov-config pytest.ini
+       --ignore=src/lbmpy/custom_code_nodes.py
+       --ignore=src/lbmpy/lookup_tables.py
+       --ignore=src/lbmpy/phasefield_allen_cahn/contact_angle.py
 markers =
       longrun: tests only run at night since they have large execution time
-       notebook: jupyter notebooks
+       notebook: mark for notebooks
+# these warnings all come from third party libraries.
+filterwarnings =
+       ignore:the imp module is deprecated in favour of importlib:DeprecationWarning
+       ignore:'contextfilter' is renamed to 'pass_context':DeprecationWarning
+       ignore:Persisting input arguments took:UserWarning


 [run]
 branch = True
-source = lbmpy
-         lbmpy_tests
+source = src/lbmpy
+         tests

 omit = doc/*
-       lbmpy_tests/*
+       tests/*
       setup.py
       conftest.py
       versioneer.py
-       lbmpy/_version.py
+       quicktest.py
+       noxfile.py
+       src/lbmpy/_version.py
+       src/lbmpy/_compat.py
+       venv/

 [report]
 exclude_lines =
@@ -25,10 +41,12 @@ exclude_lines =
       pragma: no cover

       def __repr__
+       def _repr_html_

       # Don't complain if tests don't hit defensive assertion code:
       raise AssertionError
       raise NotImplementedError
+       NotImplementedError()
       #raise ValueError

       # Don't complain if non-runnable code isn't run:
@@ -37,7 +55,7 @@ exclude_lines =
       if __name__ == .__main__.:

 skip_covered = True
-fail_under = 89
+fail_under = 87

 [html]
 directory = coverage_report
--- a/quicktest.py
+++ b/quicktest.py
+#!/usr/bin/env python3
+
+from contextlib import redirect_stdout
+import io
+from tests.test_quicktests import (
+    test_poiseuille_channel_quicktest,
+    test_entropic_methods,
+    test_cumulant_ldc
+)
+
+quick_tests = [
+    test_poiseuille_channel_quicktest,
+    test_entropic_methods,
+    test_cumulant_ldc,
+]
+
+if __name__ == "__main__":
+    print("Running lbmpy quicktests")
+    for qt in quick_tests:
+        print(f"   -> {qt.__name__}")
+        with redirect_stdout(io.StringIO()):
+            qt()
--- a/setup.cfg
+++ b/setup.cfg
-# See the docstring in versioneer.py for instructions. Note that you must
-# re-run 'versioneer.py setup' after changing this section, and commit the
-# resulting files.
-
-[versioneer]
-VCS = git
-style = pep440
-versionfile_source = lbmpy/_version.py
-versionfile_build = lbmpy/_version.py
-tag_prefix = release/
-parentdir_prefix = lbmpy-
\ No newline at end of file
--- a/setup.py
+++ b/setup.py
-import os
-import sys
-import io
-from setuptools import setup, find_packages
-import distutils
-from contextlib import redirect_stdout
-from importlib import import_module
+from setuptools import setup, __version__ as setuptools_version

-import versioneer
-
-try:
-    import cython  # noqa
-    USE_CYTHON = True
-except ImportError:
-    USE_CYTHON = False
-
-quick_tests = [
-    'test_serial_scenarios.test_ldc_mrt',
-    'test_serial_scenarios.test_channel_srt',
-]
-
-class SimpleTestRunner(distutils.cmd.Command):
-    """A custom command to run selected tests"""
-
-    description = 'run some quick tests'
-    user_options = []
-
-    @staticmethod
-    def _run_tests_in_module(test):
-        """Short test runner function - to work also if py.test is not installed."""
-        test = 'lbmpy_tests.' + test
-        mod, function_name = test.rsplit('.', 1)
-        if isinstance(mod, str):
-            mod = import_module(mod)
-
-        func = getattr(mod, function_name)
-        with redirect_stdout(io.StringIO()):
-            func()
-
-    def initialize_options(self):
-        pass
-
-    def finalize_options(self):
-        pass
-
-    def run(self):
-        """Run command."""
-        for test in quick_tests:
-            self._run_tests_in_module(test)
-
-
-def readme():
-    with open('README.md') as f:
-        return f.read()
+if int(setuptools_version.split('.')[0]) < 61:
+    raise Exception(
+        "[ERROR] lbmpy requires at least setuptools version 61 to install.\n"
+        "If this error occurs during an installation via pip, it is likely that there is a conflict between "
+        "versions of setuptools installed by pip and the system package manager. "
+        "In this case, it is recommended to install lbmpy into a virtual environment instead."
+    )

+import versioneer

-def cython_extensions(*extensions):
-    from distutils.extension import Extension
-    if USE_CYTHON:
-        ext = '.pyx'
-        result = [Extension(e, [os.path.join(*e.split(".")) + ext]) for e in extensions]
-        from Cython.Build import cythonize
-        result = cythonize(result, language_level=3)
-        return result
-    elif all([os.path.exists(os.path.join(*e.split(".")) + '.c') for e in extensions]):
-        ext = '.c'
-        result = [Extension(e, [os.path.join(*e.split(".")) + ext]) for e in extensions]
-        return result
-    else:
-        return None

 def get_cmdclass():
-    cmdclass={"quicktest": SimpleTestRunner}
-    cmdclass.update(versioneer.get_cmdclass())
-    return cmdclass
+    return versioneer.get_cmdclass()


-setup(name='lbmpy',
-      version=versioneer.get_version(),
-      description='Code Generation for Lattice Boltzmann Methods',
-      long_description=readme(),
-      long_description_content_type="text/markdown",
-      author='Martin Bauer',
-      license='AGPLv3',
-      author_email='martin.bauer@fau.de',
-      url='https://i10git.cs.fau.de/pycodegen/lbmpy/',
-      packages=['lbmpy'] + ['lbmpy.' + s for s in find_packages('lbmpy')],
-      install_requires=['pystencils', 'sympy>=1.2', 'numpy>=1.11.0'],
-      package_data={'lbmpy': ['phasefield/simplex_projection.pyx',
-                              'phasefield/simplex_projection.c']},
-      ext_modules=cython_extensions("lbmpy.phasefield.simplex_projection"),
-      classifiers=[
-          'Development Status :: 4 - Beta',
-          'Framework :: Jupyter',
-          'Topic :: Software Development :: Code Generators',
-          'Topic :: Scientific/Engineering :: Physics',
-          'Intended Audience :: Developers',
-          'Intended Audience :: Science/Research',
-          'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)',
-      ],
-      python_requires=">=3.6",
-      extras_require={
-          'gpu': ['pycuda'],
-          'opencl': ['pyopencl'],
-          'alltrafos': ['islpy', 'py-cpuinfo'],
-          'interactive': ['scipy', 'scikit-image', 'cython', 'matplotlib',
-                          'ipy_table', 'imageio', 'jupyter', 'pyevtk'],
-          'doc': ['sphinx', 'sphinx_rtd_theme', 'nbsphinx',
-                  'sphinxcontrib-bibtex', 'sphinx_autodoc_typehints', 'pandoc'],
-          'phasefield': ['Cython']
-      },
-      cmdclass=get_cmdclass()
-      )
+setup(
+    version=versioneer.get_version(),
+    cmdclass=get_cmdclass(),
+)
--- a/src/lbmpy/__init__.py
+++ b/src/lbmpy/__init__.py
+from .creationfunctions import (
+    create_lb_ast,
+    create_lb_collision_rule,
+    create_lb_function,
+    create_lb_method,
+    create_lb_update_rule,
+    LBMConfig,
+    LBMOptimisation,
+)
+from .enums import Stencil, Method, ForceModel, CollisionSpace, SubgridScaleModel
+from .lbstep import LatticeBoltzmannStep
+from .macroscopic_value_kernels import (
+    pdf_initialization_assignments,
+    macroscopic_values_getter,
+    strain_rate_tensor_getter,
+    compile_macroscopic_values_getter,
+    compile_macroscopic_values_setter,
+    create_advanced_velocity_setter_collision_rule,
+)
+from .maxwellian_equilibrium import get_weights
+from .relaxationrates import (
+    relaxation_rate_from_lattice_viscosity,
+    lattice_viscosity_from_relaxation_rate,
+    relaxation_rate_from_magic_number,
+)
+from .scenarios import create_lid_driven_cavity, create_fully_periodic_flow
+from .stencils import LBStencil
+
+
+__all__ = [
+    "create_lb_ast",
+    "create_lb_collision_rule",
+    "create_lb_function",
+    "create_lb_method",
+    "create_lb_update_rule",
+    "LBMConfig",
+    "LBMOptimisation",
+    "Stencil",
+    "Method",
+    "ForceModel",
+    "CollisionSpace",
+    "SubgridScaleModel",
+    "LatticeBoltzmannStep",
+    "pdf_initialization_assignments",
+    "macroscopic_values_getter",
+    "strain_rate_tensor_getter",
+    "compile_macroscopic_values_getter",
+    "compile_macroscopic_values_setter",
+    "create_advanced_velocity_setter_collision_rule",
+    "get_weights",
+    "relaxation_rate_from_lattice_viscosity",
+    "lattice_viscosity_from_relaxation_rate",
+    "relaxation_rate_from_magic_number",
+    "create_lid_driven_cavity",
+    "create_fully_periodic_flow",
+    "LBStencil",
+]
+
+
+from . import _version
+__version__ = _version.get_versions()['version']
--- a/src/lbmpy/_compat.py
+++ b/src/lbmpy/_compat.py
+from pystencils import __version__ as ps_version
+
+#   Determine if we're running pystencils 1.x or 2.x
+version_tokes = ps_version.split(".")
+
+PYSTENCILS_VERSION_MAJOR = int(version_tokes[0])
+IS_PYSTENCILS_2 = PYSTENCILS_VERSION_MAJOR == 2
+
+if IS_PYSTENCILS_2:
+    from pystencils.defaults import DEFAULTS
+
+    def get_loop_counter_symbol(coord: int):
+        return DEFAULTS.spatial_counters[coord]
+
+    def get_supported_instruction_sets():
+        from pystencils import Target
+        vector_targets = Target.available_vector_cpu_targets()
+        isas = []
+        for target in vector_targets:
+            tokens = target.name.split("_")
+            isas.append(tokens[-1].lower())
+        return isas
+
+else:
+    from pystencils.backends.simd_instruction_sets import (
+        get_supported_instruction_sets as get_supported_instruction_sets_,
+    )
+
+    get_supported_instruction_sets = get_supported_instruction_sets_
+
+    def get_loop_counter_symbol(coord: int):
+        from pystencils.astnodes import LoopOverCoordinate
+
+        return LoopOverCoordinate.get_loop_counter_symbol(coord)
+
+
+def import_guard_pystencils1(feature):
+    if IS_PYSTENCILS_2:
+        raise ImportError(
+            f"The following feature is not yet available when running pystencils 2.x: {feature}"
+        )
+    return True
--- a/lbmpy/_version.py
+++ b/lbmpy/_version.py
@@ -5,8 +5,9 @@
 # directories (produced by setup.py build) will contain a much shorter file
 # that just contains the computed version number.

-# This file is released into the public domain. Generated by
-# versioneer-0.19 (https://github.com/python-versioneer/python-versioneer)
+# This file is released into the public domain.
+# Generated by versioneer-0.29
+# https://github.com/python-versioneer/python-versioneer

 """Git implementation of _version.py."""

@@ -15,9 +16,11 @@ import os
 import re
 import subprocess
 import sys
+from typing import Any, Callable, Dict, List, Optional, Tuple
+import functools


-def get_keywords():
+def get_keywords() -> Dict[str, str]:
    """Get the keywords needed to look up the version information."""
    # these strings will be replaced by git during git-archive.
    # setup.py/versioneer.py will grep for the variable names, so they must
@@ -33,8 +36,15 @@ def get_keywords():
 class VersioneerConfig:
    """Container for Versioneer configuration parameters."""

+    VCS: str
+    style: str
+    tag_prefix: str
+    parentdir_prefix: str
+    versionfile_source: str
+    verbose: bool

-def get_config():
+
+def get_config() -> VersioneerConfig:
    """Create, populate and return the VersioneerConfig() object."""
    # these strings are filled in when 'setup.py versioneer' creates
    # _version.py
@@ -43,7 +53,7 @@ def get_config():
    cfg.style = "pep440"
    cfg.tag_prefix = "release/"
    cfg.parentdir_prefix = "lbmpy-"
-    cfg.versionfile_source = "lbmpy/_version.py"
+    cfg.versionfile_source = "src/lbmpy/_version.py"
    cfg.verbose = False
    return cfg

@@ -52,13 +62,13 @@ class NotThisMethod(Exception):
    """Exception raised if a method is not valid for the current scenario."""


-LONG_VERSION_PY = {}
-HANDLERS = {}
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}


-def register_vcs_handler(vcs, method):  # decorator
+def register_vcs_handler(vcs: str, method: str) -> Callable:  # decorator
    """Create decorator to mark a method as the handler of a VCS."""
-    def decorate(f):
+    def decorate(f: Callable) -> Callable:
        """Store f in HANDLERS[vcs][method]."""
        if vcs not in HANDLERS:
            HANDLERS[vcs] = {}
@@ -67,22 +77,35 @@ def register_vcs_handler(vcs, method):  # decorator
    return decorate


-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
-                env=None):
+def run_command(
+    commands: List[str],
+    args: List[str],
+    cwd: Optional[str] = None,
+    verbose: bool = False,
+    hide_stderr: bool = False,
+    env: Optional[Dict[str, str]] = None,
+) -> Tuple[Optional[str], Optional[int]]:
    """Call the given command(s)."""
    assert isinstance(commands, list)
-    p = None
-    for c in commands:
+    process = None
+
+    popen_kwargs: Dict[str, Any] = {}
+    if sys.platform == "win32":
+        # This hides the console window if pythonw.exe is used
+        startupinfo = subprocess.STARTUPINFO()
+        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+        popen_kwargs["startupinfo"] = startupinfo
+
+    for command in commands:
        try:
-            dispcmd = str([c] + args)
+            dispcmd = str([command] + args)
            # remember shell=False, so use git.cmd on windows, not just git
-            p = subprocess.Popen([c] + args, cwd=cwd, env=env,
-                                 stdout=subprocess.PIPE,
-                                 stderr=(subprocess.PIPE if hide_stderr
-                                         else None))
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None), **popen_kwargs)
            break
-        except EnvironmentError:
-            e = sys.exc_info()[1]
+        except OSError as e:
            if e.errno == errno.ENOENT:
                continue
            if verbose:
@@ -93,16 +116,20 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
        if verbose:
            print("unable to find command, tried %s" % (commands,))
        return None, None
-    stdout = p.communicate()[0].strip().decode()
-    if p.returncode != 0:
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
        if verbose:
            print("unable to run %s (error)" % dispcmd)
            print("stdout was %s" % stdout)
-        return None, p.returncode
-    return stdout, p.returncode
+        return None, process.returncode
+    return stdout, process.returncode


-def versions_from_parentdir(parentdir_prefix, root, verbose):
+def versions_from_parentdir(
+    parentdir_prefix: str,
+    root: str,
+    verbose: bool,
+) -> Dict[str, Any]:
    """Try to determine the version from the parent directory name.

    Source tarballs conventionally unpack into a directory that includes both
@@ -111,15 +138,14 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
    """
    rootdirs = []

-    for i in range(3):
+    for _ in range(3):
        dirname = os.path.basename(root)
        if dirname.startswith(parentdir_prefix):
            return {"version": dirname[len(parentdir_prefix):],
                    "full-revisionid": None,
                    "dirty": False, "error": None, "date": None}
-        else:
-            rootdirs.append(root)
-            root = os.path.dirname(root)  # up a level
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level

    if verbose:
        print("Tried directories %s but none started with prefix %s" %
@@ -128,39 +154,42 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):


 @register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs):
+def git_get_keywords(versionfile_abs: str) -> Dict[str, str]:
    """Extract version information from the given file."""
    # the code embedded in _version.py can just fetch the value of these
    # keywords. When used from setup.py, we don't want to import _version.py,
    # so we do it with a regexp instead. This function is not used from
    # _version.py.
-    keywords = {}
+    keywords: Dict[str, str] = {}
    try:
-        f = open(versionfile_abs, "r")
-        for line in f.readlines():
-            if line.strip().startswith("git_refnames ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["refnames"] = mo.group(1)
-            if line.strip().startswith("git_full ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["full"] = mo.group(1)
-            if line.strip().startswith("git_date ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["date"] = mo.group(1)
-        f.close()
-    except EnvironmentError:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
        pass
    return keywords


 @register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(keywords, tag_prefix, verbose):
+def git_versions_from_keywords(
+    keywords: Dict[str, str],
+    tag_prefix: str,
+    verbose: bool,
+) -> Dict[str, Any]:
    """Get version information from git keywords."""
-    if not keywords:
-        raise NotThisMethod("no keywords at all, weird")
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
    date = keywords.get("date")
    if date is not None:
        # Use only the last line.  Previous lines may contain GPG signature
@@ -179,11 +208,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
        if verbose:
            print("keywords are unexpanded, not using")
        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
-    refs = set([r.strip() for r in refnames.strip("()").split(",")])
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
    TAG = "tag: "
-    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
    if not tags:
        # Either we're using git < 1.8.3, or there really are no tags. We use
        # a heuristic: assume all version tags have a digit. The old git %d
@@ -192,7 +221,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
        # between branches and tags. By ignoring refnames without digits, we
        # filter out many common branch names like "release" and
        # "stabilization", as well as "HEAD" and "master".
-        tags = set([r for r in refs if re.search(r'\d', r)])
+        tags = {r for r in refs if re.search(r'\d', r)}
        if verbose:
            print("discarding '%s', no digits" % ",".join(refs - tags))
    if verbose:
@@ -201,6 +230,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
        # sorting will prefer e.g. "2.0" over "2.0rc1"
        if ref.startswith(tag_prefix):
            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
            if verbose:
                print("picking %s" % r)
            return {"version": r,
@@ -216,7 +250,12 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):


 @register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
+def git_pieces_from_vcs(
+    tag_prefix: str,
+    root: str,
+    verbose: bool,
+    runner: Callable = run_command
+) -> Dict[str, Any]:
    """Get version from 'git describe' in the root of the source tree.

    This only gets called if the git-archive 'subst' keywords were *not*
@@ -227,8 +266,15 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
    if sys.platform == "win32":
        GITS = ["git.cmd", "git.exe"]

-    out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
-                          hide_stderr=True)
+    # GIT_DIR can interfere with correct operation of Versioneer.
+    # It may be intended to be passed to the Versioneer-versioned project,
+    # but that should not change where we get our version from.
+    env = os.environ.copy()
+    env.pop("GIT_DIR", None)
+    runner = functools.partial(runner, env=env)
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=not verbose)
    if rc != 0:
        if verbose:
            print("Directory %s not under git control" % root)
@@ -236,24 +282,57 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):

    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
    # if there isn't one, this yields HEX[-dirty] (no NUM)
-    describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
-                                          "--always", "--long",
-                                          "--match", "%s*" % tag_prefix],
-                                   cwd=root)
+    describe_out, rc = runner(GITS, [
+        "describe", "--tags", "--dirty", "--always", "--long",
+        "--match", f"{tag_prefix}[[:digit:]]*"
+    ], cwd=root)
    # --long was added in git-1.5.5
    if describe_out is None:
        raise NotThisMethod("'git describe' failed")
    describe_out = describe_out.strip()
-    full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
    if full_out is None:
        raise NotThisMethod("'git rev-parse' failed")
    full_out = full_out.strip()

-    pieces = {}
+    pieces: Dict[str, Any] = {}
    pieces["long"] = full_out
    pieces["short"] = full_out[:7]  # maybe improved later
    pieces["error"] = None

+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
    # TAG might have hyphens.
    git_describe = describe_out
@@ -270,7 +349,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
        # TAG-NUM-gHEX
        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
        if not mo:
-            # unparseable. Maybe git-describe is misbehaving?
+            # unparsable. Maybe git-describe is misbehaving?
            pieces["error"] = ("unable to parse git-describe output: '%s'"
                               % describe_out)
            return pieces
@@ -295,13 +374,11 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
    else:
        # HEX: no tags
        pieces["closest-tag"] = None
-        count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
-                                    cwd=root)
-        pieces["distance"] = int(count_out)  # total number of commits
+        out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
+        pieces["distance"] = len(out.split())  # total number of commits

    # commit date: see ISO-8601 comment in git_versions_from_keywords()
-    date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
-                       cwd=root)[0].strip()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
    # Use only the last line.  Previous lines may contain GPG signature
    # information.
    date = date.splitlines()[-1]
@@ -310,14 +387,14 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
    return pieces


-def plus_or_dot(pieces):
+def plus_or_dot(pieces: Dict[str, Any]) -> str:
    """Return a + if we don't already have one, else return a ."""
    if "+" in pieces.get("closest-tag", ""):
        return "."
    return "+"


-def render_pep440(pieces):
+def render_pep440(pieces: Dict[str, Any]) -> str:
    """Build up version string, with post-release "local version identifier".

    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
@@ -342,23 +419,71 @@ def render_pep440(pieces):
    return rendered


-def render_pep440_pre(pieces):
-    """TAG[.post0.devDISTANCE] -- No -dirty.
+def render_pep440_branch(pieces: Dict[str, Any]) -> str:
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).

    Exceptions:
-    1: no tags. 0.post0.devDISTANCE
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces: Dict[str, Any]) -> str:
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
        if pieces["distance"]:
-            rendered += ".post0.dev%d" % pieces["distance"]
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%d" % (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
    else:
        # exception #1
        rendered = "0.post0.dev%d" % pieces["distance"]
    return rendered


-def render_pep440_post(pieces):
+def render_pep440_post(pieces: Dict[str, Any]) -> str:
    """TAG[.postDISTANCE[.dev0]+gHEX] .

    The ".dev0" means dirty. Note that .dev0 sorts backwards
@@ -385,7 +510,36 @@ def render_pep440_post(pieces):
    return rendered


-def render_pep440_old(pieces):
+def render_pep440_post_branch(pieces: Dict[str, Any]) -> str:
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces: Dict[str, Any]) -> str:
    """TAG[.postDISTANCE[.dev0]] .

    The ".dev0" means dirty.
@@ -407,7 +561,7 @@ def render_pep440_old(pieces):
    return rendered


-def render_git_describe(pieces):
+def render_git_describe(pieces: Dict[str, Any]) -> str:
    """TAG[-DISTANCE-gHEX][-dirty].

    Like 'git describe --tags --dirty --always'.
@@ -427,7 +581,7 @@ def render_git_describe(pieces):
    return rendered


-def render_git_describe_long(pieces):
+def render_git_describe_long(pieces: Dict[str, Any]) -> str:
    """TAG-DISTANCE-gHEX[-dirty].

    Like 'git describe --tags --dirty --always -long'.
@@ -447,7 +601,7 @@ def render_git_describe_long(pieces):
    return rendered


-def render(pieces, style):
+def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:
    """Render the given version pieces into the requested style."""
    if pieces["error"]:
        return {"version": "unknown",
@@ -461,10 +615,14 @@ def render(pieces, style):

    if style == "pep440":
        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
    elif style == "pep440-pre":
        rendered = render_pep440_pre(pieces)
    elif style == "pep440-post":
        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
    elif style == "pep440-old":
        rendered = render_pep440_old(pieces)
    elif style == "git-describe":
@@ -479,7 +637,7 @@ def render(pieces, style):
            "date": pieces.get("date")}


-def get_versions():
+def get_versions() -> Dict[str, Any]:
    """Get version information or return default if unable to do so."""
    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
    # __file__, we can work backwards from there to the root. Some
@@ -500,7 +658,7 @@ def get_versions():
        # versionfile_source is the relative path from the top of the source
        # tree (where the .git directory might live) to this file. Invert
        # this to find the root from __file__.
-        for i in cfg.versionfile_source.split('/'):
+        for _ in cfg.versionfile_source.split('/'):
            root = os.path.dirname(root)
    except NameError:
        return {"version": "0+unknown", "full-revisionid": None,

--- a/lbmpy/advanced_streaming/__init__.py
+++ b/lbmpy/advanced_streaming/__init__.py
-from .indexing import BetweenTimestepsIndexing, NeighbourOffsetArrays
+from .indexing import BetweenTimestepsIndexing
 from .communication import get_communication_slices, LBMPeriodicityHandling
 from .utility import Timestep, get_accessor, is_inplace, get_timesteps, \
    numeric_index, numeric_offsets, inverse_dir_index, AccessPdfValues

-__all__ = ['BetweenTimestepsIndexing', 'NeighbourOffsetArrays',
+__all__ = ['BetweenTimestepsIndexing',
           'get_communication_slices', 'LBMPeriodicityHandling',
           'Timestep', 'get_accessor', 'is_inplace', 'get_timesteps',
           'numeric_index', 'numeric_offsets', 'inverse_dir_index', 'AccessPdfValues']
--- a/lbmpy/advanced_streaming/communication.py
+++ b/lbmpy/advanced_streaming/communication.py
 import itertools
-from pystencils import Field, Assignment
-from pystencils.slicing import shift_slice, get_slice_before_ghost_layer, normalize_slice
-from lbmpy.advanced_streaming.utility import is_inplace, get_accessor, numeric_index, \
-    numeric_offsets, Timestep, get_timesteps
-from lbmpy.stencils import get_stencil
+from pystencils import CreateKernelConfig, Field, Assignment, AssignmentCollection, Target
+from pystencils.slicing import (
+    shift_slice,
+    get_slice_before_ghost_layer,
+    normalize_slice,
+)
+from lbmpy.advanced_streaming.utility import (
+    is_inplace,
+    get_accessor,
+    numeric_index,
+    Timestep,
+    get_timesteps,
+    numeric_offsets,
+)
 from pystencils.datahandling import SerialDataHandling
 from itertools import chain


-def _trim_slice_in_direction(slices, direction):
-    assert len(slices) == len(direction)
+class LBMPeriodicityHandling:

-    result = []
-    for s, d in zip(slices, direction):
-        if isinstance(s, int):
-            result.append(s)
-            continue
-        start = s.start + 1 if d == -1 else s.start
-        stop = s.stop - 1 if d == 1 else s.stop
-        result.append(slice(start, stop, s.step))
+    def __init__(
+        self,
+        stencil,
+        data_handling,
+        pdf_field_name,
+        streaming_pattern="pull",
+        ghost_layers=1,
+        cupy_direct_copy=True,
+    ):
+        """
+        Periodicity Handling for Lattice Boltzmann Streaming.
+
+        **On the usage with cuda:**
+        - cupy allows the copying of sliced arrays within device memory using the numpy syntax,
+        e.g. `dst[:,0] = src[:,-1]`. In this implementation, this is the default for periodicity
+        handling. Alternatively, if you set `cupy_direct_copy=False`, GPU kernels are generated and
+        compiled. The compiled kernels are almost twice as fast in execution as cupy array copying,
+        but especially for large stencils like D3Q27, their compilation can take up to 20 seconds.
+        Choose your weapon depending on your use case.
+        """
+        if not isinstance(data_handling, SerialDataHandling):
+            raise ValueError("Only serial data handling is supported!")

-    return tuple(result)
+        self.stencil = stencil
+        self.dim = stencil.D
+        self.dh = data_handling

+        assert data_handling.default_target in [Target.CPU, Target.GPU]
+        self.target = data_handling.default_target

-def _extend_dir(direction):
-    if len(direction) == 0:
-        yield tuple()
-    elif direction[0] == 0:
-        for d in [-1, 0, 1]:
-            for rest in _extend_dir(direction[1:]):
-                yield (d, ) + rest
-    else:
-        for rest in _extend_dir(direction[1:]):
-            yield (direction[0], ) + rest
+        self.pdf_field_name = pdf_field_name
+        self.ghost_layers = ghost_layers
+        self.periodicity = data_handling.periodicity
+        self.inplace_pattern = is_inplace(streaming_pattern)

+        self.cpu = self.target == Target.CPU
+        self.cupy_direct_copy = self.target == Target.GPU and cupy_direct_copy

-def _get_neighbour_transform(direction, ghost_layers):
-    return tuple(d * (ghost_layers + 1) for d in direction)
+        def is_copy_direction(direction):
+            s = 0
+            for d, p in zip(direction, self.periodicity):
+                s += abs(d)
+                if d != 0 and not p:
+                    return False

+            return s != 0

-def _fix_length_one_slices(slices):
-    """Slices of length one are replaced by their start value for correct periodic shifting"""
-    if isinstance(slices, int):
-        return slices
-    elif isinstance(slices, slice):
-        if slices.stop is not None and abs(slices.start - slices.stop) == 1:
-            return slices.start
-        elif slices.stop is None and slices.start == -1:
-            return -1  # [-1:] also has length one
+        full_stencil = itertools.product(*([-1, 0, 1] for _ in range(self.dim)))
+        copy_directions = tuple(filter(is_copy_direction, full_stencil))
+        self.comm_slices = []
+        timesteps = get_timesteps(streaming_pattern)
+        for timestep in timesteps:
+            slices_per_comm_dir = get_communication_slices(
+                stencil=stencil,
+                comm_stencil=copy_directions,
+                streaming_pattern=streaming_pattern,
+                prev_timestep=timestep,
+                ghost_layers=ghost_layers,
+            )
+            self.comm_slices.append(
+                list(chain.from_iterable(v for k, v in slices_per_comm_dir.items()))
+            )
+
+        if self.target == Target.GPU and not cupy_direct_copy:
+            self.device_copy_kernels = list()
+            for timestep in timesteps:
+                self.device_copy_kernels.append(self._compile_copy_kernels(timestep))
+
+    def __call__(self, prev_timestep=Timestep.BOTH):
+        if self.cpu:
+            self._periodicity_handling_cpu(prev_timestep)
        else:
-            return slices
-    else:
-        return tuple(_fix_length_one_slices(s) for s in slices)
+            self._periodicity_handling_gpu(prev_timestep)
+
+    def _periodicity_handling_cpu(self, prev_timestep):
+        arr = self.dh.cpu_arrays[self.pdf_field_name]
+        comm_slices = self.comm_slices[prev_timestep.idx]
+        for src, dst in comm_slices:
+            arr[dst] = arr[src]
+
+    def _compile_copy_kernels(self, timestep):
+        assert self.target == Target.GPU
+        pdf_field = self.dh.fields[self.pdf_field_name]
+        kernels = []
+        for src, dst in self.comm_slices[timestep.idx]:
+            kernels.append(periodic_pdf_gpu_copy_kernel(pdf_field, src, dst))
+        return kernels
+
+    def _periodicity_handling_gpu(self, prev_timestep):
+        arr = self.dh.gpu_arrays[self.pdf_field_name]
+        if self.cupy_direct_copy:
+            for src, dst in self.comm_slices[prev_timestep.idx]:
+                arr[dst] = arr[src]
+        else:
+            kernel_args = {self.pdf_field_name: arr}
+            for kernel in self.device_copy_kernels[prev_timestep.idx]:
+                kernel(**kernel_args)


 def get_communication_slices(
-        stencil, comm_stencil=None, streaming_pattern='pull', prev_timestep=Timestep.BOTH, ghost_layers=1):
+    stencil,
+    comm_stencil=None,
+    streaming_pattern="pull",
+    prev_timestep=Timestep.BOTH,
+    ghost_layers=1,
+):
    """
    Return the source and destination slices for periodicity handling or communication between blocks.

    :param stencil: The stencil used by the LB method.
-    :param comm_stencil: The stencil defining the communication directions. If None, it will be set to the 
+    :param comm_stencil: The stencil defining the communication directions. If None, it will be set to the
                         full stencil (D2Q9 in 2D, D3Q27 in 3D, etc.).
    :param streaming_pattern: The streaming pattern.
    :param prev_timestep: Timestep after which communication is run.
@@ -68,11 +137,12 @@ def get_communication_slices(

    """

-    dim = len(stencil[0])
    if comm_stencil is None:
-        comm_stencil = itertools.product(*([-1, 0, 1] for _ in range(dim)))
+        comm_stencil = itertools.product(*([-1, 0, 1] for _ in range(stencil.D)))

-    pdfs = Field.create_generic('pdfs', spatial_dimensions=len(stencil[0]), index_shape=(len(stencil),))
+    pdfs = Field.create_generic(
+        "pdfs", spatial_dimensions=len(stencil[0]), index_shape=(stencil.Q,)
+    )
    write_accesses = get_accessor(streaming_pattern, prev_timestep).write(pdfs, stencil)
    slices_per_comm_direction = dict()

@@ -84,19 +154,27 @@ def get_communication_slices(

        for streaming_dir in set(_extend_dir(comm_dir)) & set(stencil):
            d = stencil.index(streaming_dir)
-            write_offsets = numeric_offsets(write_accesses[d])
            write_index = numeric_index(write_accesses[d])[0]

+            origin_slice = get_slice_before_ghost_layer(
+                comm_dir, ghost_layers=ghost_layers, thickness=1
+            )
+            src_slice = _fix_length_one_slices(origin_slice)
+
+            write_offsets = numeric_offsets(write_accesses[d])
            tangential_dir = tuple(s - c for s, c in zip(streaming_dir, comm_dir))
-            origin_slice = get_slice_before_ghost_layer(comm_dir, ghost_layers=ghost_layers, thickness=1)
-            origin_slice = _fix_length_one_slices(origin_slice)
-            src_slice = shift_slice(_trim_slice_in_direction(origin_slice, tangential_dir), write_offsets)
+
+            # TODO: this is just a hotfix. _trim_slice_in_direction breaks FreeSlip BC with adjacent periodic side
+            if streaming_pattern != "pull":
+                src_slice = shift_slice(
+                    _trim_slice_in_direction(src_slice, tangential_dir), write_offsets
+                )

            neighbour_transform = _get_neighbour_transform(comm_dir, ghost_layers)
            dst_slice = shift_slice(src_slice, neighbour_transform)

-            src_slice = src_slice + (write_index, )
-            dst_slice = dst_slice + (write_index, )
+            src_slice = src_slice + (write_index,)
+            dst_slice = dst_slice + (write_index,)

            slices_for_dir.append((src_slice, dst_slice))

@@ -104,11 +182,10 @@ def get_communication_slices(
    return slices_per_comm_direction


-def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice,
-                             domain_size=None, target='gpu',
-                             opencl_queue=None, opencl_ctx=None):
-    """Copies a rectangular array slice onto another non-overlapping array slice"""
-    from pystencils.gpucuda.kernelcreation import create_cuda_kernel
+def periodic_pdf_gpu_copy_kernel(pdf_field, src_slice, dst_slice, domain_size=None):
+    """Generate a GPU kernel which copies all values from one slice of a field
+    to another non-overlapping slice."""
+    from pystencils import create_kernel

    pdf_idx = src_slice[-1]
    assert isinstance(pdf_idx, int), "PDF index needs to be an integer constant"
@@ -116,6 +193,7 @@ def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice,
    src_slice = src_slice[:-1]
    dst_slice = dst_slice[:-1]

+    # TODO this is the domain_size with GL
    if domain_size is None:
        domain_size = pdf_field.spatial_shape

@@ -128,120 +206,71 @@ def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice,
    def _stop(s):
        return s.stop if isinstance(s, slice) else s

-    offset = [_start(s1) - _start(s2) for s1, s2 in zip(normalized_from_slice, normalized_to_slice)]
-    assert offset == [_stop(s1) - _stop(s2) for s1, s2 in zip(normalized_from_slice, normalized_to_slice)], \
-        "Slices have to have same size"
-
-    copy_eq = Assignment(pdf_field(pdf_idx), pdf_field[tuple(offset)](pdf_idx))
-    ast = create_cuda_kernel([copy_eq], iteration_slice=dst_slice, skip_independence_check=True)
-    if target == 'gpu':
-        from pystencils.gpucuda import make_python_function
-        return make_python_function(ast)
-    elif target == 'opencl':
-        from pystencils.opencl import make_python_function
-        return make_python_function(ast, opencl_queue, opencl_ctx)
-    else:
-        raise ValueError('Invalid target:', target)
-
-
-class LBMPeriodicityHandling:
+    offset = [
+        _start(s1) - _start(s2)
+        for s1, s2 in zip(normalized_from_slice, normalized_to_slice)
+    ]
+    assert offset == [
+        _stop(s1) - _stop(s2)
+        for s1, s2 in zip(normalized_from_slice, normalized_to_slice)
+    ], "Slices have to have same size"
+
+    copy_eq = AssignmentCollection(
+        main_assignments=[
+            Assignment(pdf_field(pdf_idx), pdf_field[tuple(offset)](pdf_idx))
+        ]
+    )
+    config = CreateKernelConfig(
+        iteration_slice=dst_slice,
+        skip_independence_check=True,
+        target=Target.GPU,
+    )
+
+    ast = create_kernel(copy_eq, config=config)
+    return ast.compile()

-    def __init__(self, stencil, data_handling, pdf_field_name,
-                 streaming_pattern='pull', ghost_layers=1,
-                 opencl_queue=None, opencl_ctx=None,
-                 pycuda_direct_copy=True):
-        """
-            Periodicity Handling for Lattice Boltzmann Streaming.
-
-            **On the usage with cuda/opencl:** 
-            - pycuda allows the copying of sliced arrays within device memory using the numpy syntax,
-            e.g. `dst[:,0] = src[:,-1]`. In this implementation, this is the default for periodicity
-            handling. Alternatively, if you set `pycuda_direct_copy=False`, GPU kernels are generated and
-            compiled. The compiled kernels are almost twice as fast in execution as pycuda array copying,
-            but especially for large stencils like D3Q27, their compilation can take up to 20 seconds. 
-            Choose your weapon depending on your use case.
-
-            - pyopencl does not support copying of non-contiguous sliced arrays, so the usage of compiled
-            copy kernels is forced on us. On the positive side, compilation of the OpenCL kernels appears
-            to be about four times faster.
-        """
-        if not isinstance(data_handling, SerialDataHandling):
-            raise ValueError('Only serial data handling is supported!')
-
-        if isinstance(stencil, str):
-            stencil = get_stencil(stencil)
-
-        self.stencil = stencil
-        self.dim = len(stencil[0])
-        self.dh = data_handling

-        target = data_handling.default_target
-        assert target in ['cpu', 'gpu', 'opencl']
-
-        self.pdf_field_name = pdf_field_name
-        self.ghost_layers = ghost_layers
-        periodicity = data_handling.periodicity
-        self.inplace_pattern = is_inplace(streaming_pattern)
-        self.target = target
-        self.cpu = target == 'cpu'
-        self.opencl_queue = opencl_queue
-        self.opencl_ctx = opencl_ctx
-        self.pycuda_direct_copy = target == 'gpu' and pycuda_direct_copy
+def _extend_dir(direction):
+    if len(direction) == 0:
+        yield tuple()
+    elif direction[0] == 0:
+        for d in [-1, 0, 1]:
+            for rest in _extend_dir(direction[1:]):
+                yield (d,) + rest
+    else:
+        for rest in _extend_dir(direction[1:]):
+            yield (direction[0],) + rest

-        def is_copy_direction(direction):
-            s = 0
-            for d, p in zip(direction, periodicity):
-                s += abs(d)
-                if d != 0 and not p:
-                    return False

-            return s != 0
+def _get_neighbour_transform(direction, ghost_layers):
+    return tuple(d * (ghost_layers + 1) for d in direction)

-        full_stencil = itertools.product(*([-1, 0, 1] for _ in range(self.dim)))
-        copy_directions = tuple(filter(is_copy_direction, full_stencil))
-        self.comm_slices = []
-        timesteps = get_timesteps(streaming_pattern)
-        for timestep in timesteps:
-            slices_per_comm_dir = get_communication_slices(stencil=stencil,
-                                                           comm_stencil=copy_directions,
-                                                           streaming_pattern=streaming_pattern,
-                                                           prev_timestep=timestep,
-                                                           ghost_layers=ghost_layers)
-            self.comm_slices.append(list(chain.from_iterable(v for k, v in slices_per_comm_dir.items())))
-
-        if target == 'opencl' or (target == 'gpu' and not pycuda_direct_copy):
-            self.device_copy_kernels = []
-            for timestep in timesteps:
-                self.device_copy_kernels.append(self._compile_copy_kernels(timestep))

-    def __call__(self, prev_timestep=Timestep.BOTH):
-        if self.cpu:
-            self._periodicity_handling_cpu(prev_timestep)
+def _fix_length_one_slices(slices):
+    """Slices of length one are replaced by their start value for correct periodic shifting"""
+    if isinstance(slices, int):
+        return slices
+    elif isinstance(slices, slice):
+        if slices.stop is not None and abs(slices.start - slices.stop) == 1:
+            return slices.start
+        elif slices.stop is None and slices.start == -1:
+            return -1  # [-1:] also has length one
        else:
-            self._periodicity_handling_gpu(prev_timestep)
+            return slices
+    else:
+        return tuple(_fix_length_one_slices(s) for s in slices)

-    def _periodicity_handling_cpu(self, prev_timestep):
-        arr = self.dh.cpu_arrays[self.pdf_field_name]
-        comm_slices = self.comm_slices[prev_timestep.idx]
-        for src, dst in comm_slices:
-            arr[dst] = arr[src]

-    def _compile_copy_kernels(self, timestep):
-        pdf_field = self.dh.fields[self.pdf_field_name]
-        kernels = []
-        for src, dst in self.comm_slices[timestep.idx]:
-            kernels.append(
-                periodic_pdf_copy_kernel(
-                    pdf_field, src, dst, target=self.target,
-                    opencl_queue=self.opencl_queue, opencl_ctx=self.opencl_ctx))
-        return kernels
+def _trim_slice_in_direction(slices, direction):
+    assert len(slices) == len(direction)

-    def _periodicity_handling_gpu(self, prev_timestep):
-        arr = self.dh.gpu_arrays[self.pdf_field_name]
-        if self.pycuda_direct_copy:
-            for src, dst in self.comm_slices[prev_timestep.idx]:
-                arr[dst] = arr[src]
-        else:
-            kernel_args = {self.pdf_field_name: arr}
-            for kernel in self.device_copy_kernels[prev_timestep.idx]:
-                kernel(**kernel_args)
+    result = []
+    for s, d in zip(slices, direction):
+        if isinstance(s, int):
+            result.append(s)
+            continue
+        start = s.start + 1 if d == -1 else s.start
+        stop = s.stop - 1 if d == 1 else s.stop
+        result.append(slice(start, stop, s.step))
+
+    return tuple(result)
--- a/lbmpy/advanced_streaming/indexing.py
+++ b/lbmpy/advanced_streaming/indexing.py
@@ -2,19 +2,20 @@ import numpy as np
 import sympy as sp
 import pystencils as ps

-from pystencils.data_types import TypedSymbol, create_type
-from pystencils.backends.cbackend import CustomCodeNode
+from .._compat import IS_PYSTENCILS_2

-from lbmpy.stencils import get_stencil
-from lbmpy.advanced_streaming.utility import get_accessor, inverse_dir_index, is_inplace, Timestep
+if IS_PYSTENCILS_2:
+    from pystencils import TypedSymbol, create_type
+    from pystencils.types.quick import Arr
+    from lbmpy.lookup_tables import TranslationArraysNode
+else:
+    from pystencils.typing import TypedSymbol, create_type
+    from ..custom_code_nodes import TranslationArraysNode

+from lbmpy.advanced_streaming.utility import get_accessor, inverse_dir_index, is_inplace, Timestep
 from itertools import product


-def _array_pattern(dtype, name, content):
-    return f"const {str(dtype)} {name} [] = {{ {','.join(str(c) for c in content)} }}; \n"
-
-
 class BetweenTimestepsIndexing:

    #   ==============================================
@@ -31,7 +32,7 @@ class BetweenTimestepsIndexing:

    @property
    def inverse_dir_symbol(self):
-        """Symbol denoting the inversion of a PDF field index. 
+        """Symbol denoting the inversion of a PDF field index.
        Use only at top-level of index to f_out or f_in, otherwise it can't be correctly replaced."""
        return sp.IndexedBase('invdir')

@@ -40,14 +41,11 @@ class BetweenTimestepsIndexing:
    #   =============================

    def __init__(self, pdf_field, stencil, prev_timestep=Timestep.BOTH, streaming_pattern='pull',
-                 index_dtype=np.int64, offsets_dtype=np.int64):
+                 index_dtype=np.int32, offsets_dtype=np.int32):
        if prev_timestep == Timestep.BOTH and is_inplace(streaming_pattern):
            raise ValueError('Cannot create index arrays for both kinds of timesteps for inplace streaming pattern '
                             + streaming_pattern)

-        if isinstance(stencil, str):
-            stencil = get_stencil(stencil)
-
        prev_accessor = get_accessor(streaming_pattern, prev_timestep)
        next_accessor = get_accessor(streaming_pattern, prev_timestep.next())

@@ -58,8 +56,8 @@ class BetweenTimestepsIndexing:

        self._pdf_field = pdf_field
        self._stencil = stencil
-        self._dim = len(stencil[0])
-        self._q = len(stencil)
+        self._dim = stencil.D
+        self._q = stencil.Q
        self._coordinate_names = ['x', 'y', 'z'][:self._dim]

        self._index_dtype = create_type(index_dtype)
@@ -73,13 +71,21 @@ class BetweenTimestepsIndexing:
        assert f_dir in ['in', 'out']
        inv = '_inv' if inverse else ''
        name = f"f_{f_dir}{inv}_dir_idx"
-        return TypedSymbol(name, self._index_dtype)
+        if IS_PYSTENCILS_2:
+            return TypedSymbol(name, Arr(self._index_dtype, self._q))
+        else:
+            return TypedSymbol(name, self._index_dtype)

    def _offset_array_symbols(self, f_dir, inverse):
        assert f_dir in ['in', 'out']
        inv = '_inv' if inverse else ''
        name_base = f"f_{f_dir}{inv}_offsets_"
-        symbols = [TypedSymbol(name_base + d, self._index_dtype) for d in self._coordinate_names]
+
+        if IS_PYSTENCILS_2:
+            symbols = [TypedSymbol(name_base + d, Arr(self._index_dtype, self._q)) for d in self._coordinate_names]
+        else:
+            symbols = [TypedSymbol(name_base + d, self._index_dtype) for d in self._coordinate_names]
+        
        return symbols

    def _array_symbols(self, f_dir, inverse, index):
@@ -172,62 +178,31 @@ class BetweenTimestepsIndexing:
        return trivial_index_translations, trivial_offset_translations

    def create_code_node(self):
-        return BetweenTimestepsIndexing.TranslationArraysNode(self)
-
-    class TranslationArraysNode(CustomCodeNode):
-
-        def __init__(self, indexing):
-            code = ''
-            symbols_defined = set()
-
-            for f_dir, inv in indexing._required_index_arrays:
-                indices, offsets = indexing._get_translated_indices_and_offsets(f_dir, inv)
-                index_array_symbol = indexing._index_array_symbol(f_dir, inv)
-                symbols_defined.add(index_array_symbol)
-                code += _array_pattern(indexing._index_dtype, index_array_symbol.name, indices)
-
-            for f_dir, inv in indexing._required_offset_arrays:
-                indices, offsets = indexing._get_translated_indices_and_offsets(f_dir, inv)
-                offset_array_symbols = indexing._offset_array_symbols(f_dir, inv)
-                symbols_defined |= set(offset_array_symbols)
-                for d, arrsymb in enumerate(offset_array_symbols):
-                    code += _array_pattern(indexing._offsets_dtype, arrsymb.name, offsets[d])
-
-            super(BetweenTimestepsIndexing.TranslationArraysNode, self).__init__(
-                code, symbols_read=set(), symbols_defined=symbols_defined)
-
-        def __str__(self):
-            return "Variable PDF Access Translation Arrays"
-
-        def __repr__(self):
-            return "Variable PDF Access Translation Arrays"
-
-#   end class AdvancedStreamingIndexing
-
+        array_content = list()
+        symbols_defined = set()
+        for f_dir, inv in self._required_index_arrays:
+            indices, offsets = self._get_translated_indices_and_offsets(f_dir, inv)
+            index_array_symbol = self._index_array_symbol(f_dir, inv)
+            symbols_defined.add(index_array_symbol)

-class NeighbourOffsetArrays(CustomCodeNode):
+            if IS_PYSTENCILS_2:
+                array_content.append((index_array_symbol, indices))
+            else:
+                array_content.append((self._index_dtype, index_array_symbol.name, indices))

-    @staticmethod
-    def neighbour_offset(dir_idx, stencil):
-        if isinstance(sp.sympify(dir_idx), sp.Integer):
-            return stencil[dir_idx]
+        for f_dir, inv in self._required_offset_arrays:
+            indices, offsets = self._get_translated_indices_and_offsets(f_dir, inv)
+            offset_array_symbols = self._offset_array_symbols(f_dir, inv)
+            symbols_defined |= set(offset_array_symbols)
+            for d, arrsymb in enumerate(offset_array_symbols):
+                if IS_PYSTENCILS_2:
+                    array_content.append((arrsymb, offsets[d]))
+                else:
+                    array_content.append((self._offsets_dtype, arrsymb.name, offsets[d]))
+
+        if IS_PYSTENCILS_2:
+            return TranslationArraysNode(array_content)
        else:
-            return tuple([sp.IndexedBase(symbol, shape=(1,))[dir_idx]
-                         for symbol in NeighbourOffsetArrays._offset_symbols(len(stencil[0]))])
-
-    @staticmethod
-    def _offset_symbols(dim):
-        return [TypedSymbol(f"neighbour_offset_{d}", create_type(np.int64)) for d in ['x', 'y', 'z'][:dim]]
+            return TranslationArraysNode(array_content, symbols_defined)

-    def __init__(self, stencil, offsets_dtype=np.int64):
-        offsets_dtype = create_type(offsets_dtype)
-        dim = len(stencil[0])
-
-        array_symbols = NeighbourOffsetArrays._offset_symbols(dim)
-        code = "\n"
-        for i, arrsymb in enumerate(array_symbols):
-            code += _array_pattern(offsets_dtype, arrsymb.name, (d[i] for d in stencil))
-
-        offset_symbols = NeighbourOffsetArrays._offset_symbols(dim)
-        super(NeighbourOffsetArrays, self).__init__(code, symbols_read=set(),
-                                                    symbols_defined=set(offset_symbols))
+#   end class AdvancedStreamingIndexing
--- a/lbmpy/advanced_streaming/utility.py
+++ b/lbmpy/advanced_streaming/utility.py
@@ -4,7 +4,11 @@ from lbmpy.fieldaccess import PdfFieldAccessor, \
    AAEvenTimeStepAccessor, \
    AAOddTimeStepAccessor, \
    EsoTwistEvenTimeStepAccessor, \
-    EsoTwistOddTimeStepAccessor
+    EsoTwistOddTimeStepAccessor, \
+    EsoPullEvenTimeStepAccessor, \
+    EsoPullOddTimeStepAccessor, \
+    EsoPushEvenTimeStepAccessor, \
+    EsoPushOddTimeStepAccessor

 import numpy as np
 import pystencils as ps
@@ -33,41 +37,48 @@ class Timestep(IntEnum):
            return 'Both'


-streaming_patterns = ['push', 'pull', 'aa', 'esotwist']
+streaming_patterns = ['push', 'pull', 'aa', 'esotwist', 'esopull', 'esopush']

 even_accessors = {
    'pull': StreamPullTwoFieldsAccessor,
    'push': StreamPushTwoFieldsAccessor,
    'aa': AAEvenTimeStepAccessor,
-    'esotwist': EsoTwistEvenTimeStepAccessor
+    'esotwist': EsoTwistEvenTimeStepAccessor,
+    'esopull': EsoPullEvenTimeStepAccessor,
+    'esopush': EsoPushEvenTimeStepAccessor
 }

 odd_accessors = {
    'pull': StreamPullTwoFieldsAccessor,
    'push': StreamPushTwoFieldsAccessor,
    'aa': AAOddTimeStepAccessor,
-    'esotwist': EsoTwistOddTimeStepAccessor
+    'esotwist': EsoTwistOddTimeStepAccessor,
+    'esopull': EsoPullOddTimeStepAccessor,
+    'esopush': EsoPushOddTimeStepAccessor
 }


+def is_inplace(streaming_pattern):
+    if streaming_pattern not in streaming_patterns:
+        raise ValueError('Invalid streaming pattern', streaming_pattern)
+
+    return streaming_pattern in ['aa', 'esotwist', 'esopull', 'esopush']
+
+
 def get_accessor(streaming_pattern: str, timestep: Timestep) -> PdfFieldAccessor:
    if streaming_pattern not in streaming_patterns:
        raise ValueError(
            "Invalid value of parameter 'streaming_pattern'.", streaming_pattern)

+    if is_inplace(streaming_pattern) and (timestep == Timestep.BOTH):
+        raise ValueError(f"Invalid timestep for streaming pattern {streaming_pattern}: {str(timestep)}")
+
    if timestep == Timestep.EVEN:
        return even_accessors[streaming_pattern]
    else:
        return odd_accessors[streaming_pattern]


-def is_inplace(streaming_pattern):
-    if streaming_pattern not in streaming_patterns:
-        raise ValueError('Invalid streaming pattern', streaming_pattern)
-
-    return streaming_pattern in ['aa', 'esotwist']
-
-
 def get_timesteps(streaming_pattern):
    return (Timestep.EVEN, Timestep.ODD) if is_inplace(streaming_pattern) else (Timestep.BOTH, )

@@ -94,7 +105,7 @@ class AccessPdfValues:
        if streaming_dir not in ['in', 'out']:
            raise ValueError('Invalid streaming direction.', streaming_dir)

-        pdf_field = ps.Field.create_generic('pdfs', len(stencil[0]), index_shape=(len(stencil),))
+        pdf_field = ps.Field.create_generic('pdfs', len(stencil[0]), index_shape=(stencil.Q,))

        if accessor is None:
            accessor = get_accessor(streaming_pattern, timestep)

--- a/src/lbmpy/analytical_solutions.py
+++ b/src/lbmpy/analytical_solutions.py
+from typing import Union
+from numpy.typing import NDArray
+
+
+def poiseuille_flow(middle_distance: Union[float, NDArray], height,
+                    ext_force_density: float, dyn_visc: float) -> Union[float, NDArray]:
+    """
+    Analytical solution for plane Poiseuille flow.
+
+    Args:
+        middle_distance: Distance to the middle plane of the channel.
+        height: Distance between the boundaries.
+        ext_force_density: Force density on the fluid normal to the boundaries.
+        dyn_visc: dyn_visc
+
+    Returns:
+        A numpy array of the poiseuille profile if middle_distance is given as array otherwise of velocity of
+        the position given with middle_distance
+    """
+    return ext_force_density * 1. / (2 * dyn_visc) * (height**2.0 / 4.0 - middle_distance**2.0)
--- a/src/lbmpy/boundaries/__init__.py
+++ b/src/lbmpy/boundaries/__init__.py
+from lbmpy.boundaries.boundaryconditions import (
+    UBB, FixedDensity, DiffusionDirichlet, SimpleExtrapolationOutflow, WallFunctionBounce,
+    ExtrapolationOutflow, NeumannByCopy, NoSlip, NoSlipLinearBouzidi, QuadraticBounceBack, StreamInConstant, FreeSlip)
+from lbmpy.boundaries.boundaryhandling import LatticeBoltzmannBoundaryHandling
+from lbmpy.boundaries.wall_function_models import MoninObukhovSimilarityTheory, LogLaw, MuskerLaw, SpaldingsLaw
+
+__all__ = ['NoSlip', 'NoSlipLinearBouzidi', 'QuadraticBounceBack', 'FreeSlip', 'WallFunctionBounce',
+           'UBB', 'FixedDensity',
+           'SimpleExtrapolationOutflow', 'ExtrapolationOutflow',
+           'DiffusionDirichlet', 'NeumannByCopy', 'StreamInConstant',
+           'LatticeBoltzmannBoundaryHandling',
+           'MoninObukhovSimilarityTheory', 'LogLaw', 'MuskerLaw', 'SpaldingsLaw']
No results found