diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 411a9bfe9db080b1196e2bf1583a7c51d478f941..dcc80a139b87edb14a5418f5928b16804e256c91 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -19,7 +19,7 @@ stages:
 
 .build_template:
    script:
-      - pip install -I cmake==3.16.3
+      - pip install -I cmake==3.16.3 jinja2
       - export NUM_CORES=$(nproc --all)
       - export MAX_BUILD_CORES=$(( $(awk '( $1 == "MemTotal:" ) { print $2 }' /proc/meminfo) / ( 4 * 1024 * 1024  ) ))
       - "[[ $MAX_BUILD_CORES -lt $NUM_CORES ]] && export NUM_BUILD_CORES=$MAX_BUILD_CORES || export NUM_BUILD_CORES=$NUM_CORES"
@@ -47,7 +47,7 @@ stages:
         -DWALBERLA_BUILD_WITH_OPENMP=$WALBERLA_BUILD_WITH_OPENMP
         -DCMAKE_BUILD_TYPE=$CMAKE_BUILD_TYPE -DMPIEXEC_PREFLAGS=$MPIEXEC_PREFLAGS
         -DWALBERLA_DOUBLE_ACCURACY=$WALBERLA_DOUBLE_ACCURACY
-        -DWARNING_ERROR=ON
+        -DWARNING_ERROR=$WARNING_ERROR
         -DWALBERLA_BUILD_WITH_METIS=$WALBERLA_BUILD_WITH_METIS
         -DWALBERLA_BUILD_WITH_PARMETIS=$WALBERLA_BUILD_WITH_PARMETIS
         -DWALBERLA_BUILD_WITH_FFTW=$WALBERLA_BUILD_WITH_FFTW
@@ -77,6 +77,7 @@ stages:
       WALBERLA_BUILD_WITH_FFTW: "ON"
       WALBERLA_ENABLE_GUI: "OFF"
       WALBERLA_LOGLEVEL: "DETAIL"
+      WARNING_ERROR: "ON"
    artifacts:
       when: always
       reports:
@@ -93,341 +94,105 @@ stages:
 
 
 
-intel_19_serial:
+icc_2022_serial:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:19
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "ON"
-      WALBERLA_BUILD_WITH_MPI: "OFF"
-      WALBERLA_BUILD_WITH_OPENMP: "OFF"
-      WALBERLA_BUILD_WITH_PARMETIS: "OFF"
-      CMAKE_CXX_FLAGS: "-qoverride-limits"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
-   tags:
-      - cuda
-      - docker
-      - intel
-
-intel_19_mpionly:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:19
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "ON"
-      WALBERLA_BUILD_WITH_OPENMP: "OFF"
-      CMAKE_CXX_FLAGS: "-qoverride-limits"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
-   tags:
-      - cuda
-      - docker
-      - intel
-
-intel_19_hybrid:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:19
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "ON"
-      CMAKE_CXX_FLAGS: "-qoverride-limits"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
-   tags:
-      - cuda
-      - docker
-      - intel
-
-intel_19_serial_dbg:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:19
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "ON"
-      WALBERLA_BUILD_WITH_MPI: "OFF"
-      WALBERLA_BUILD_WITH_OPENMP: "OFF"
-      WALBERLA_BUILD_WITH_PARMETIS: "OFF"
-      CMAKE_BUILD_TYPE: "DebugOptimized"
-      CMAKE_CXX_FLAGS: "-qoverride-limits"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
-   tags:
-      - cuda
-      - docker
-      - intel
-
-intel_19_mpionly_dbg:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:19
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "ON"
-      CMAKE_BUILD_TYPE: "DebugOptimized"
-      WALBERLA_BUILD_WITH_OPENMP: "OFF"
-      CMAKE_CXX_FLAGS: "-qoverride-limits"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
-   tags:
-      - cuda
-      - docker
-      - intel
-
-intel_19_hybrid_dbg:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:19
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "ON"
-      CMAKE_BUILD_TYPE: "DebugOptimized"
-      CMAKE_CXX_FLAGS: "-qoverride-limits"
-   tags:
-      - cuda
-      - docker
-      - intel
-
-intel_19_hybrid_dbg_sp:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:19
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "ON"
-      CMAKE_BUILD_TYPE: "DebugOptimized"
-      WALBERLA_DOUBLE_ACCURACY: "OFF"
-      WALBERLA_BUILD_WITH_PARMETIS: "OFF"
-      WALBERLA_BUILD_WITH_METIS: "OFF"
-      CMAKE_CXX_FLAGS: "-qoverride-limits"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
-   tags:
-      - cuda
-      - docker
-      - intel
-
-intel_20_serial:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:20
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "ON"
-      WALBERLA_BUILD_WITH_MPI: "OFF"
-      WALBERLA_BUILD_WITH_OPENMP: "OFF"
-      WALBERLA_BUILD_WITH_PARMETIS: "OFF"
-      CMAKE_CXX_FLAGS: "-qoverride-limits"
-      WALBERLA_BUILD_WITH_METIS: "OFF"
-      WALBERLA_BUILD_WITH_PARMETIS: "OFF"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
-   tags:
-      - cuda
-      - docker
-      - intel
-
-intel_20_mpionly:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:20
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "ON"
-      WALBERLA_BUILD_WITH_OPENMP: "OFF"
-      CMAKE_CXX_FLAGS: "-qoverride-limits"
-      WALBERLA_BUILD_WITH_METIS: "OFF"
-      WALBERLA_BUILD_WITH_PARMETIS: "OFF"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
-   tags:
-      - cuda
-      - docker
-      - intel
-
-intel_20_hybrid:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:20
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "ON"
-      CMAKE_CXX_FLAGS: "-qoverride-limits"
-      WALBERLA_BUILD_WITH_METIS: "OFF"
-      WALBERLA_BUILD_WITH_PARMETIS: "OFF"
-   tags:
-      - cuda
-      - docker
-      - intel
-
-intel_20_serial_dbg:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:20
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "ON"
-      WALBERLA_BUILD_WITH_MPI: "OFF"
-      WALBERLA_BUILD_WITH_OPENMP: "OFF"
-      WALBERLA_BUILD_WITH_PARMETIS: "OFF"
-      CMAKE_BUILD_TYPE: "DebugOptimized"
-      CMAKE_CXX_FLAGS: "-qoverride-limits"
-      WALBERLA_BUILD_WITH_METIS: "OFF"
-      WALBERLA_BUILD_WITH_PARMETIS: "OFF"
-   tags:
-      - cuda
-      - docker
-      - intel
-
-intel_20_mpionly_dbg:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:20
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "ON"
-      CMAKE_BUILD_TYPE: "DebugOptimized"
-      WALBERLA_BUILD_WITH_OPENMP: "OFF"
-      CMAKE_CXX_FLAGS: "-qoverride-limits"
-      WALBERLA_BUILD_WITH_METIS: "OFF"
-      WALBERLA_BUILD_WITH_PARMETIS: "OFF"
-   tags:
-      - cuda
-      - docker
-      - intel
-
-intel_20_hybrid_dbg:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:20
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "ON"
-      CMAKE_BUILD_TYPE: "DebugOptimized"
-      CMAKE_CXX_FLAGS: "-qoverride-limits"
-      WALBERLA_BUILD_WITH_METIS: "OFF"
-      WALBERLA_BUILD_WITH_PARMETIS: "OFF"
-   tags:
-      - cuda
-      - docker
-      - intel
-
-intel_20_hybrid_dbg_sp:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:20
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "ON"
-      CMAKE_BUILD_TYPE: "DebugOptimized"
-      WALBERLA_DOUBLE_ACCURACY: "OFF"
-      WALBERLA_BUILD_WITH_PARMETIS: "OFF"
-      WALBERLA_BUILD_WITH_METIS: "OFF"
-      CMAKE_CXX_FLAGS: "-qoverride-limits"
-      WALBERLA_BUILD_WITH_METIS: "OFF"
-      WALBERLA_BUILD_WITH_PARMETIS: "OFF"
-   tags:
-      - cuda
-      - docker
-      - intel
-
-gcc_7_serial:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:7
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/icc-2022
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_MPI: "OFF"
       WALBERLA_BUILD_WITH_OPENMP: "OFF"
       WALBERLA_BUILD_WITH_PARMETIS: "OFF"
+      WARNING_ERROR: "OFF"
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
    tags:
-      - cuda
+      - cuda11
       - docker
 
-gcc_7_mpionly:
+icc_2022_mpionly:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:7
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/icc-2022
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_OPENMP: "OFF"
+      WARNING_ERROR: "OFF"
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
    tags:
-      - cuda
+      - cuda11
       - docker
 
-gcc_7_hybrid:
+icc_2022_hybrid:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:7
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/icc-2022
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
+      WARNING_ERROR: "OFF"
    tags:
-      - cuda
+      - cuda11
       - docker
 
-gcc_7_serial_dbg:
+icc_2022_serial_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:7
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/icc-2022
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_MPI: "OFF"
       WALBERLA_BUILD_WITH_OPENMP: "OFF"
       WALBERLA_BUILD_WITH_PARMETIS: "OFF"
       CMAKE_BUILD_TYPE: "DebugOptimized"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
+      WARNING_ERROR: "OFF"
    tags:
-      - cuda
+      - cuda11
       - docker
 
-gcc_7_mpionly_dbg:
+icc_2022_mpionly_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:7
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/icc-2022
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
       WALBERLA_BUILD_WITH_OPENMP: "OFF"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
+      WARNING_ERROR: "OFF"
    tags:
-      - cuda
+      - cuda11
       - docker
 
-gcc_7_hybrid_dbg:
+icc_2022_hybrid_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:7
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/icc-2022
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
+      WARNING_ERROR: "OFF"
    tags:
-      - cuda
+      - cuda11
       - docker
 
-gcc_7_hybrid_dbg_sp:
+icc_2022_hybrid_dbg_sp:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:7
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/icc-2022
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
       WALBERLA_DOUBLE_ACCURACY: "OFF"
       WALBERLA_BUILD_WITH_PARMETIS: "OFF"
       WALBERLA_BUILD_WITH_METIS: "OFF"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
+      WARNING_ERROR: "OFF"
    tags:
-      - cuda
+      - cuda11
       - docker
 
-gcc_8_serial:
+icx_2022_serial:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:8
-   before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
-      - cd python
-      - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
-      - pip3 list
-      - cd ..
-      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/icx-2022
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_MPI: "OFF"
       WALBERLA_BUILD_WITH_OPENMP: "OFF"
       WALBERLA_BUILD_WITH_PARMETIS: "OFF"
-      WALBERLA_BUILD_WITH_CODEGEN: "ON"
-      WALBERLA_BUILD_WITH_PYTHON: "ON"
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
@@ -435,21 +200,12 @@ gcc_8_serial:
       - cuda11
       - docker
 
-gcc_8_mpionly:
+icx_2022_mpionly:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:8
-   before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
-      - cd python
-      - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
-      - pip3 list
-      - cd ..
-      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/icx-2022
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_OPENMP: "OFF"
-      WALBERLA_BUILD_WITH_CODEGEN: "ON"
-      WALBERLA_BUILD_WITH_PYTHON: "ON"
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
@@ -457,127 +213,67 @@ gcc_8_mpionly:
       - cuda11
       - docker
 
-gcc_8_hybrid:
+icx_2022_hybrid:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:8
-   before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
-      - cd python
-      - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
-      - pip3 list
-      - cd ..
-      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/icx-2022
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
-      WALBERLA_BUILD_WITH_CODEGEN: "ON"
-      WALBERLA_BUILD_WITH_PYTHON: "ON"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
    tags:
       - cuda11
       - docker
 
-gcc_8_serial_dbg:
+icx_2022_serial_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:8
-   before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
-      - cd python
-      - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
-      - pip3 list
-      - cd ..
-      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/icx-2022
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_MPI: "OFF"
       WALBERLA_BUILD_WITH_OPENMP: "OFF"
       WALBERLA_BUILD_WITH_PARMETIS: "OFF"
       CMAKE_BUILD_TYPE: "DebugOptimized"
-      WALBERLA_BUILD_WITH_CODEGEN: "ON"
-      WALBERLA_BUILD_WITH_PYTHON: "ON"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
    tags:
       - cuda11
       - docker
 
-gcc_8_mpionly_dbg:
+icx_2022_mpionly_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:8
-   before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
-      - cd python
-      - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
-      - pip3 list
-      - cd ..
-      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/icx-2022
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
       WALBERLA_BUILD_WITH_OPENMP: "OFF"
-      WALBERLA_BUILD_WITH_CODEGEN: "ON"
-      WALBERLA_BUILD_WITH_PYTHON: "ON"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
    tags:
       - cuda11
       - docker
 
-gcc_8_hybrid_dbg:
+icx_2022_hybrid_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:8
-   before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
-      - cd python
-      - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
-      - pip3 list
-      - cd ..
-      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/icx-2022
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
-      WALBERLA_BUILD_WITH_CODEGEN: "ON"
-      WALBERLA_BUILD_WITH_PYTHON: "ON"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
    tags:
       - cuda11
       - docker
 
-gcc_8_hybrid_dbg_sp:
+icx_2022_hybrid_dbg_sp:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:8
-   before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
-      - cd python
-      - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
-      - pip3 list
-      - cd ..
-      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/icx-2022
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
       WALBERLA_DOUBLE_ACCURACY: "OFF"
       WALBERLA_BUILD_WITH_PARMETIS: "OFF"
       WALBERLA_BUILD_WITH_METIS: "OFF"
-      WALBERLA_BUILD_WITH_CODEGEN: "ON"
-      WALBERLA_BUILD_WITH_PYTHON: "ON"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
    tags:
       - cuda11
       - docker
 
 gcc_9_serial:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-9
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -599,9 +295,9 @@ gcc_9_serial:
 
 gcc_9_mpionly:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-9
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -621,9 +317,9 @@ gcc_9_mpionly:
 
 gcc_9_hybrid:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-9
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -642,9 +338,9 @@ gcc_9_hybrid:
 
 gcc_9_serial_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-9
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -667,9 +363,9 @@ gcc_9_serial_dbg:
 
 gcc_9_mpionly_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-9
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -690,9 +386,9 @@ gcc_9_mpionly_dbg:
 
 gcc_9_hybrid_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-9
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -703,18 +399,15 @@ gcc_9_hybrid_dbg:
       CMAKE_BUILD_TYPE: "DebugOptimized"
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
       WALBERLA_BUILD_WITH_PYTHON: "ON"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
    tags:
       - cuda11
       - docker
 
 gcc_9_hybrid_dbg_sp:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-9
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -737,9 +430,9 @@ gcc_9_hybrid_dbg_sp:
 
 gcc_10_serial:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:10
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -761,9 +454,9 @@ gcc_10_serial:
 
 gcc_10_mpionly:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:10
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -783,9 +476,9 @@ gcc_10_mpionly:
 
 gcc_10_hybrid:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:10
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -804,9 +497,9 @@ gcc_10_hybrid:
 
 gcc_10_serial_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:10
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -829,9 +522,9 @@ gcc_10_serial_dbg:
 
 gcc_10_mpionly_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:10
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -852,9 +545,9 @@ gcc_10_mpionly_dbg:
 
 gcc_10_hybrid_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:10
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -874,9 +567,9 @@ gcc_10_hybrid_dbg:
 
 gcc_10_hybrid_dbg_sp:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:10
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -899,9 +592,9 @@ gcc_10_hybrid_dbg_sp:
 
 gcc_11_serial:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:11
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -923,9 +616,9 @@ gcc_11_serial:
 
 gcc_11_mpionly:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:11
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -945,9 +638,9 @@ gcc_11_mpionly:
 
 gcc_11_hybrid:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:11
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -966,9 +659,9 @@ gcc_11_hybrid:
 
 gcc_11_serial_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:11
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -991,9 +684,9 @@ gcc_11_serial_dbg:
 
 gcc_11_mpionly_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:11
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1014,9 +707,9 @@ gcc_11_mpionly_dbg:
 
 gcc_11_hybrid_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:11
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1036,9 +729,9 @@ gcc_11_hybrid_dbg:
 
 gcc_11_hybrid_dbg_sp:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:11
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1061,7 +754,7 @@ gcc_11_hybrid_dbg_sp:
 
 gcc_12_serial:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:12
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-12
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_MPI: "OFF"
@@ -1076,7 +769,7 @@ gcc_12_serial:
 
 gcc_12_mpionly:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:12
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-12
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_OPENMP: "OFF"
@@ -1089,7 +782,7 @@ gcc_12_mpionly:
 
 gcc_12_hybrid:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:12
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-12
    stage: pretest
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
@@ -1099,7 +792,7 @@ gcc_12_hybrid:
 
 gcc_12_serial_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:12
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-12
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_MPI: "OFF"
@@ -1112,7 +805,7 @@ gcc_12_serial_dbg:
 
 gcc_12_mpionly_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:12
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-12
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
@@ -1123,7 +816,7 @@ gcc_12_mpionly_dbg:
 
 gcc_12_hybrid_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:12
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-12
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
@@ -1133,7 +826,7 @@ gcc_12_hybrid_dbg:
 
 gcc_12_hybrid_dbg_sp:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:12
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-12
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
@@ -1144,206 +837,170 @@ gcc_12_hybrid_dbg_sp:
       - cuda11
       - docker
 
-clang_9.0_serial:
+clang_12_serial:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:9.0
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "ON"
-      WALBERLA_BUILD_WITH_MPI: "OFF"
-      WALBERLA_BUILD_WITH_OPENMP: "OFF"
-      WALBERLA_BUILD_WITH_PARMETIS: "OFF"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
-   tags:
-      - cuda
-      - docker
-
-clang_9.0_mpionly:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:9.0
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "ON"
-      WALBERLA_BUILD_WITH_OPENMP: "OFF"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
-   tags:
-      - cuda
-      - docker
-
-clang_9.0_hybrid:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:9.0
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "ON"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
-   tags:
-      - cuda
-      - docker
-
-clang_9.0_serial_dbg:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:9.0
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "ON"
-      WALBERLA_BUILD_WITH_MPI: "OFF"
-      WALBERLA_BUILD_WITH_OPENMP: "OFF"
-      WALBERLA_BUILD_WITH_PARMETIS: "OFF"
-      CMAKE_BUILD_TYPE: "DebugOptimized"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
-   tags:
-      - cuda
-      - docker
-
-clang_9.0_mpionly_dbg:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:9.0
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "ON"
-      CMAKE_BUILD_TYPE: "DebugOptimized"
-      WALBERLA_BUILD_WITH_OPENMP: "OFF"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
-   tags:
-      - cuda
-      - docker
-
-clang_9.0_hybrid_dbg:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:9.0
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "ON"
-      CMAKE_BUILD_TYPE: "DebugOptimized"
-   tags:
-      - cuda
-      - docker
-
-clang_9.0_hybrid_dbg_sp:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:9.0
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "ON"
-      CMAKE_BUILD_TYPE: "DebugOptimized"
-      WALBERLA_DOUBLE_ACCURACY: "OFF"
-      WALBERLA_BUILD_WITH_PARMETIS: "OFF"
-      WALBERLA_BUILD_WITH_METIS: "OFF"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
-   tags:
-      - cuda
-      - docker
-
-clang_10.0_serial:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:10.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-12
+   before_script:
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - cd python
+      - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
+      - pip3 list
+      - cd ..
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_MPI: "OFF"
       WALBERLA_BUILD_WITH_OPENMP: "OFF"
       WALBERLA_BUILD_WITH_PARMETIS: "OFF"
+      WALBERLA_BUILD_WITH_CODEGEN: "ON"
+      WALBERLA_BUILD_WITH_PYTHON: "ON"
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
    tags:
-      - cuda
+      - cuda11
       - docker
 
-clang_10.0_mpionly:
+clang_12_mpionly:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:10.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-12
+   before_script:
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - cd python
+      - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
+      - pip3 list
+      - cd ..
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_OPENMP: "OFF"
+      WALBERLA_BUILD_WITH_CODEGEN: "ON"
+      WALBERLA_BUILD_WITH_PYTHON: "ON"
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
    tags:
-      - cuda
+      - cuda11
       - docker
 
-clang_10.0_hybrid:
+clang_12_hybrid:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:10.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-12
+   before_script:
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - cd python
+      - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
+      - pip3 list
+      - cd ..
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
+      WALBERLA_BUILD_WITH_CODEGEN: "ON"
+      WALBERLA_BUILD_WITH_PYTHON: "ON"
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
    tags:
-      - cuda
+      - cuda11
       - docker
 
-clang_10.0_serial_dbg:
+clang_12_serial_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:10.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-12
+   before_script:
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - cd python
+      - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
+      - pip3 list
+      - cd ..
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_MPI: "OFF"
       WALBERLA_BUILD_WITH_OPENMP: "OFF"
       WALBERLA_BUILD_WITH_PARMETIS: "OFF"
       CMAKE_BUILD_TYPE: "DebugOptimized"
+      WALBERLA_BUILD_WITH_CODEGEN: "ON"
+      WALBERLA_BUILD_WITH_PYTHON: "ON"
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
    tags:
-      - cuda
+      - cuda11
       - docker
 
-clang_10.0_mpionly_dbg:
+clang_12_mpionly_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:10.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-12
+   before_script:
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - cd python
+      - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
+      - pip3 list
+      - cd ..
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
       WALBERLA_BUILD_WITH_OPENMP: "OFF"
+      WALBERLA_BUILD_WITH_CODEGEN: "ON"
+      WALBERLA_BUILD_WITH_PYTHON: "ON"
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
    tags:
-      - cuda
+      - cuda11
       - docker
 
-clang_10.0_hybrid_dbg:
+clang_12_hybrid_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:10.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-12
+   before_script:
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - cd python
+      - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
+      - pip3 list
+      - cd ..
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
+      WALBERLA_BUILD_WITH_CODEGEN: "ON"
+      WALBERLA_BUILD_WITH_PYTHON: "ON"
    tags:
-      - cuda
+      - cuda11
       - docker
 
-clang_10.0_hybrid_dbg_sp:
+clang_12_hybrid_dbg_sp:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:10.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-12
+   before_script:
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - cd python
+      - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
+      - pip3 list
+      - cd ..
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
       WALBERLA_DOUBLE_ACCURACY: "OFF"
       WALBERLA_BUILD_WITH_PARMETIS: "OFF"
       WALBERLA_BUILD_WITH_METIS: "OFF"
+      WALBERLA_BUILD_WITH_CODEGEN: "ON"
+      WALBERLA_BUILD_WITH_PYTHON: "ON"
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
    tags:
-      - cuda
+      - cuda11
       - docker
 
-clang_11.0_serial:
+clang_13_serial:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:11.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1363,11 +1020,11 @@ clang_11.0_serial:
       - cuda11
       - docker
 
-clang_11.0_mpionly:
+clang_13_mpionly:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:11.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1385,11 +1042,11 @@ clang_11.0_mpionly:
       - cuda11
       - docker
 
-clang_11.0_hybrid:
+clang_13_hybrid:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:11.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1406,11 +1063,11 @@ clang_11.0_hybrid:
       - cuda11
       - docker
 
-clang_11.0_serial_dbg:
+clang_13_serial_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:11.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1431,11 +1088,11 @@ clang_11.0_serial_dbg:
       - cuda11
       - docker
 
-clang_11.0_mpionly_dbg:
+clang_13_mpionly_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:11.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1454,11 +1111,11 @@ clang_11.0_mpionly_dbg:
       - cuda11
       - docker
 
-clang_11.0_hybrid_dbg:
+clang_13_hybrid_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:11.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1476,11 +1133,11 @@ clang_11.0_hybrid_dbg:
       - cuda11
       - docker
 
-clang_11.0_hybrid_dbg_sp:
+clang_13_hybrid_dbg_sp:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:11.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1501,11 +1158,11 @@ clang_11.0_hybrid_dbg_sp:
       - cuda11
       - docker
 
-clang_12.0_serial:
+clang_14_serial:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:12.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1525,11 +1182,11 @@ clang_12.0_serial:
       - cuda11
       - docker
 
-clang_12.0_mpionly:
+clang_14_mpionly:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:12.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1547,11 +1204,11 @@ clang_12.0_mpionly:
       - cuda11
       - docker
 
-clang_12.0_hybrid:
+clang_14_hybrid:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:12.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1568,11 +1225,11 @@ clang_12.0_hybrid:
       - cuda11
       - docker
 
-clang_12.0_serial_dbg:
+clang_14_serial_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:12.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1593,11 +1250,11 @@ clang_12.0_serial_dbg:
       - cuda11
       - docker
 
-clang_12.0_mpionly_dbg:
+clang_14_mpionly_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:12.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1616,11 +1273,11 @@ clang_12.0_mpionly_dbg:
       - cuda11
       - docker
 
-clang_12.0_hybrid_dbg:
+clang_14_hybrid_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:12.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1638,11 +1295,11 @@ clang_12.0_hybrid_dbg:
       - cuda11
       - docker
 
-clang_12.0_hybrid_dbg_sp:
+clang_14_hybrid_dbg_sp:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:12.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1663,11 +1320,11 @@ clang_12.0_hybrid_dbg_sp:
       - cuda11
       - docker
 
-clang_13.0_serial:
+clang_15_serial:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:13.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1687,11 +1344,11 @@ clang_13.0_serial:
       - cuda11
       - docker
 
-clang_13.0_mpionly:
+clang_15_mpionly:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:13.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1709,11 +1366,11 @@ clang_13.0_mpionly:
       - cuda11
       - docker
 
-clang_13.0_hybrid:
+clang_15_hybrid:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:13.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1727,11 +1384,11 @@ clang_13.0_hybrid:
       - cuda11
       - docker
 
-clang_13.0_serial_dbg:
+clang_15_serial_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:13.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1749,11 +1406,11 @@ clang_13.0_serial_dbg:
       - cuda11
       - docker
 
-clang_13.0_mpionly_dbg:
+clang_15_mpionly_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:13.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1769,11 +1426,11 @@ clang_13.0_mpionly_dbg:
       - cuda11
       - docker
 
-clang_13.0_hybrid_dbg:
+clang_15_hybrid_dbg:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:13.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1788,12 +1445,12 @@ clang_13.0_hybrid_dbg:
       - cuda11
       - docker
 
-clang_13.0_hybrid_dbg_sp:
+clang_15_hybrid_dbg_sp:
    extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:13.0
+   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15
    stage: pretest
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1811,83 +1468,6 @@ clang_13.0_hybrid_dbg_sp:
       - cuda11
       - docker
 
-inteloneapi_22.3_serial:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/inteloneapi:22.3
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
-      WALBERLA_BUILD_WITH_MPI: "OFF"
-      WALBERLA_BUILD_WITH_OPENMP: "OFF"
-      WALBERLA_BUILD_WITH_PARMETIS: "OFF"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
-   tags:
-      - docker
-
-inteloneapi_22.3_mpionly:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/inteloneapi:22.3
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
-      WALBERLA_BUILD_WITH_OPENMP: "OFF"
-   only:
-      variables:
-         - $ENABLE_NIGHTLY_BUILDS
-   tags:
-      - docker
-
-inteloneapi_22.3_hybrid:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/inteloneapi:22.3
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
-   tags:
-      - docker
-
-inteloneapi_22.3_serial_dbg:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/inteloneapi:22.3
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
-      WALBERLA_BUILD_WITH_MPI: "OFF"
-      WALBERLA_BUILD_WITH_OPENMP: "OFF"
-      WALBERLA_BUILD_WITH_PARMETIS: "OFF"
-      CMAKE_BUILD_TYPE: "DebugOptimized"
-   tags:
-      - docker
-
-inteloneapi_22.3_mpionly_dbg:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/inteloneapi:22.3
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
-      CMAKE_BUILD_TYPE: "DebugOptimized"
-      WALBERLA_BUILD_WITH_OPENMP: "OFF"
-   tags:
-      - docker
-
-inteloneapi_22.3_hybrid_dbg:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/inteloneapi:22.3
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
-      CMAKE_BUILD_TYPE: "DebugOptimized"
-   tags:
-      - docker
-
-inteloneapi_22.3_hybrid_dbg_sp:
-   extends: .build_template
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/inteloneapi:22.3
-   variables:
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
-      CMAKE_BUILD_TYPE: "DebugOptimized"
-      WALBERLA_DOUBLE_ACCURACY: "OFF"
-      WALBERLA_BUILD_WITH_PARMETIS: "OFF"
-      WALBERLA_BUILD_WITH_METIS: "OFF"
-   tags:
-      - docker
-
 
 
 gcc_8_hybrid_dbg_noboost:
@@ -1966,23 +1546,24 @@ doc:
 ##                                                                           ##
 ###############################################################################
 
-clang-tidy:
-   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:11.0
-   script:
-      - $CXX --version
-      - clang-tidy -version
-      - cmake --version
-      - mkdir $CI_PROJECT_DIR/build
-      - cd $CI_PROJECT_DIR/build
-      - cmake .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DWALBERLA_BUFFER_DEBUG=ON -DWALBERLA_BUILD_TESTS=ON -DWALBERLA_BUILD_BENCHMARKS=ON -DWALBERLA_BUILD_TUTORIALS=ON -DWALBERLA_BUILD_TOOLS=ON -DWALBERLA_BUILD_WITH_MPI=ON -DWALBERLA_BUILD_WITH_OPENMP=ON -DCMAKE_BUILD_TYPE=Debug -DWALBERLA_BUILD_WITH_METIS=ON -DWALBERLA_BUILD_WITH_PARMETIS=ON -DWALBERLA_BUILD_WITH_OPENMESH=ON -DWALBERLA_DOUBLE_ACCURACY=ON -DWALBERLA_LOGLEVEL=DETAIL
-      - cmake . -LA
-      - utilities/filterCompileCommands.py compile_commands.json
-      - run-clang-tidy.py -quiet | tee clang-tidy-output.txt
-   artifacts:
-      paths:
-         - $CI_PROJECT_DIR/build/clang-tidy-output.txt
-   tags:
-      - docker
+#clang-tidy:
+#   image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15
+#   script:
+#      - $CXX --version
+#      - clang-tidy -version
+#      - cmake --version
+#      - mkdir $CI_PROJECT_DIR/build
+#      - cd $CI_PROJECT_DIR/build
+#      - cmake .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DWALBERLA_BUFFER_DEBUG=ON -DWALBERLA_BUILD_TESTS=ON -DWALBERLA_BUILD_BENCHMARKS=ON -DWALBERLA_BUILD_TUTORIALS=ON -DWALBERLA_BUILD_TOOLS=ON -DWALBERLA_BUILD_WITH_MPI=ON -DWALBERLA_BUILD_WITH_OPENMP=ON -DCMAKE_BUILD_TYPE=Debug -DWALBERLA_BUILD_WITH_METIS=ON -DWALBERLA_BUILD_WITH_PARMETIS=ON -DWALBERLA_BUILD_WITH_OPENMESH=ON -DWALBERLA_DOUBLE_ACCURACY=ON -DWALBERLA_LOGLEVEL=DETAIL
+#      - cmake . -LA
+#      - utilities/filterCompileCommands.py compile_commands.json
+#      - wget https://raw.githubusercontent.com/llvm/llvm-project/main/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py
+#      - python3 run-clang-tidy.py -quiet | tee clang-tidy-output.txt
+#   artifacts:
+#      paths:
+#         - $CI_PROJECT_DIR/build/clang-tidy-output.txt
+#   tags:
+#      - docker
 
 
 cppcheck:
@@ -2012,7 +1593,7 @@ coverage:
       - mkdir build
       - cd build
       - if dpkg --compare-versions `ompi_info | head -2 | tail -1 | sed 's/[^0-9.]*\([0-9.]*\).*/\1/'` ge 1.10; then export MPIEXEC_PREFLAGS="--allow-run-as-root" ; fi
-      - cmake .. -DWALBERLA_BUILD_TESTS=ON -DWALBERLA_BUILD_BENCHMARKS=ON -DWALBERLA_BUILD_TUTORIALS=ON -DWALBERLA_BUILD_WITH_MPI=ON -DWALBERLA_BUILD_WITH_OPENMP=OFF -DCMAKE_BUILD_TYPE=DebugOptimized -DMPIEXEC_PREFLAGS=$MPIEXEC_PREFLAGS -DWALBERLA_BUILD_WITH_CODEGEN=OFF -DWALBERLA_BUILD_WITH_GCOV=ON -DWALBERLA_LOGLEVEL=DETAIL
+      - cmake .. -DWALBERLA_BUILD_TESTS=ON -DWALBERLA_BUILD_BENCHMARKS=ON -DWALBERLA_BUILD_TUTORIALS=ON -DWALBERLA_BUILD_WITH_MPI=ON -DWALBERLA_BUILD_WITH_OPENMP=OFF -DCMAKE_BUILD_TYPE=DebugOptimized -DMPIEXEC_PREFLAGS=$MPIEXEC_PREFLAGS -DWALBERLA_BUILD_WITH_CODEGEN=OFF -DWALBERLA_BUILD_WITH_GCOV=ON  -DWALBERLA_LOGLEVEL=DETAIL
       - cmake . -LA
       - make -j $NUM_BUILD_CORES -l $NUM_CORES
       - ctest -LE longrun --output-on-failure -j $NUM_CORES --timeout 3000
@@ -2072,6 +1653,9 @@ coverage:
 
 mac_Serial_Dbg:
    extends: .mac_build_template
+   before_script:
+     - pip3 install pystencils==1.3.2
+     - pip3 install lbmpy==1.3.2
    variables:
       CMAKE_BUILD_TYPE: "DebugOptimized"
       CTEST_EXCLUDE_LABELS: "longrun"
@@ -2082,6 +1666,9 @@ mac_Serial_Dbg:
 
 mac_Serial:
    extends: .mac_build_template
+   before_script:
+     - pip3 install pystencils==1.3.2
+     - pip3 install lbmpy==1.3.2
    variables:
       CMAKE_BUILD_TYPE: "Release"
       CTEST_EXCLUDE_LABELS: "longrun"
@@ -2092,6 +1679,9 @@ mac_Serial:
 
 mac_MpiOnly_Dbg:
    extends: .mac_build_template
+   before_script:
+     - pip3 install pystencils==1.3.2
+     - pip3 install lbmpy==1.3.2
    variables:
       CMAKE_BUILD_TYPE: "DebugOptimized"
       CTEST_EXCLUDE_LABELS: "longrun"
@@ -2103,6 +1693,9 @@ mac_MpiOnly_Dbg:
 
 mac_MpiOnly:
    extends: .mac_build_template
+   before_script:
+     - pip3 install pystencils==1.3.2
+     - pip3 install lbmpy==1.3.2
    variables:
       CMAKE_BUILD_TYPE: "Release"
       CTEST_EXCLUDE_LABELS: "longrun"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f23b17c27810f9ac3ce97e7798d77ec3133ceea7..2eaeab744481977c0554afe95ca6b03d48606306 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -403,6 +403,8 @@ if( WALBERLA_CXX_COMPILER_IS_INTEL )
    # system headers are also supported by intel, but cmake does not recognize that
    set( CMAKE_INCLUDE_SYSTEM_FLAG_CXX "-isystem " )
    add_flag ( CMAKE_CXX_FLAGS "-wd2928,2504,2259,1682,597" )
+   # disable icc/icpc deprecation warning
+   add_flag ( CMAKE_CXX_FLAGS "-diag-disable=10441" )
 elseif( WALBERLA_CXX_COMPILER_IS_GNU )
    add_flag ( CMAKE_CXX_FLAGS "-Wfloat-equal -Wextra" )
 elseif( WALBERLA_CXX_COMPILER_IS_NEC )
@@ -615,7 +617,7 @@ endif ()
 ##
 #############################################################################################################################
 if ( WALBERLA_BUILD_WITH_CODEGEN )
-   set(LBMPY_MIN_VERSION 1.2)
+   set(LBMPY_MIN_VERSION 1.3.2)
    execute_process(COMMAND ${Python_EXECUTABLE} -c "import lbmpy; print(lbmpy.__version__)"
          RESULT_VARIABLE LBMPY_FOUND OUTPUT_VARIABLE LBMPY_VERSION)
     if(NOT LBMPY_FOUND EQUAL 0)
diff --git a/extern/pybind11 b/extern/pybind11
index 8b03ffa7c06cd9c8a38297b1c8923695d1ff1b07..f7b499615e14d70ab098a20deb0cdb3889998a1a 160000
--- a/extern/pybind11
+++ b/extern/pybind11
@@ -1 +1 @@
-Subproject commit 8b03ffa7c06cd9c8a38297b1c8923695d1ff1b07
+Subproject commit f7b499615e14d70ab098a20deb0cdb3889998a1a
diff --git a/python/lbmpy_walberla/additional_data_handler.py b/python/lbmpy_walberla/additional_data_handler.py
index 692d0cf57d3e905537d3cbd1af445c26c64074bc..16e5cb35f8ed64ae77eeda06c08812bbb57e950a 100644
--- a/python/lbmpy_walberla/additional_data_handler.py
+++ b/python/lbmpy_walberla/additional_data_handler.py
@@ -1,7 +1,8 @@
 from pystencils import Target
 from pystencils.stencil import inverse_direction
+from pystencils.typing import BasicType
 
-from lbmpy.advanced_streaming import AccessPdfValues, numeric_offsets, numeric_index
+from lbmpy.advanced_streaming import AccessPdfValues, numeric_offsets, numeric_index, Timestep, is_inplace
 from lbmpy.advanced_streaming.indexing import MirroredStencilDirections
 from lbmpy.boundaries.boundaryconditions import LbBoundary
 from lbmpy.boundaries import ExtrapolationOutflow, FreeSlip, UBB
@@ -26,25 +27,8 @@ def default_additional_data_handler(boundary_obj: LbBoundary, lb_method, field_n
 class FreeSlipAdditionalDataHandler(AdditionalDataHandler):
     def __init__(self, stencil, boundary_object):
         assert isinstance(boundary_object, FreeSlip)
-        self._boundary_object = boundary_object
         super(FreeSlipAdditionalDataHandler, self).__init__(stencil=stencil)
 
-    @property
-    def constructor_arguments(self):
-        return ""
-
-    @property
-    def initialiser_list(self):
-        return ""
-
-    @property
-    def additional_arguments_for_fill_function(self):
-        return ""
-
-    @property
-    def additional_parameters_for_fill_function(self):
-        return ""
-
     def data_initialisation(self, direction):
         def array_pattern(dtype, name, content):
             return f"const {str(dtype)} {name} [] = {{ {','.join(str(c) for c in content)} }};"
@@ -102,21 +86,20 @@ class FreeSlipAdditionalDataHandler(AdditionalDataHandler):
 
         return "\n".join(init_list)
 
-    @property
-    def additional_member_variable(self):
-        return ""
-
 
 class UBBAdditionalDataHandler(AdditionalDataHandler):
     def __init__(self, stencil, boundary_object):
         assert isinstance(boundary_object, UBB)
-        self._boundary_object = boundary_object
         super(UBBAdditionalDataHandler, self).__init__(stencil=stencil)
 
+    @property
+    def constructor_argument_name(self):
+        return "velocityCallback"
+
     @property
     def constructor_arguments(self):
-        return ", std::function<Vector3<real_t>(const Cell &, const shared_ptr<StructuredBlockForest>&, IBlock&)>& " \
-               "velocityCallback "
+        return f", std::function<Vector3<real_t>(const Cell &, const shared_ptr<StructuredBlockForest>&, IBlock&)>& " \
+               f"{self.constructor_argument_name} "
 
     @property
     def initialiser_list(self):
@@ -146,19 +129,34 @@ class UBBAdditionalDataHandler(AdditionalDataHandler):
 
 
 class OutflowAdditionalDataHandler(AdditionalDataHandler):
-    def __init__(self, stencil, boundary_object, target=Target.CPU, field_name='pdfs'):
+    def __init__(self, stencil, boundary_object, target=Target.CPU, field_name='pdfs', pdfs_data_type=None, zeroth_timestep=None):
         assert isinstance(boundary_object, ExtrapolationOutflow)
-        self._boundary_object = boundary_object
         self._stencil = boundary_object.stencil
         self._lb_method = boundary_object.lb_method
         self._normal_direction = boundary_object.normal_direction
         self._field_name = field_name
         self._target = target
+        self._dtype = BasicType(boundary_object.data_type).c_name
+        if pdfs_data_type is None:
+            self._pdfs_data_type = "real_t"
+        else:
+            pdfs_data_type = BasicType(pdfs_data_type)
+            self._pdfs_data_type = pdfs_data_type.c_name
+
+        self._streaming_pattern = boundary_object.streaming_pattern
+        if zeroth_timestep:
+            self._zeroth_timestep = zeroth_timestep
+        else:
+            self._zeroth_timestep = Timestep.EVEN if is_inplace(self._streaming_pattern) else Timestep.BOTH
         super(OutflowAdditionalDataHandler, self).__init__(stencil=stencil)
 
         assert sum([a != 0 for a in self._normal_direction]) == 1, \
             "The outflow boundary is only implemented for straight walls at the moment."
 
+    @property
+    def constructor_argument_name(self):
+        return f"{self._field_name}CPUID_" if self._target == Target.GPU else ""
+
     @property
     def constructor_arguments(self):
         return f", BlockDataID {self._field_name}CPUID_" if self._target == Target.GPU else ""
@@ -170,18 +168,18 @@ class OutflowAdditionalDataHandler(AdditionalDataHandler):
     @property
     def additional_field_data(self):
         identifier = "CPU" if self._target == Target.GPU else ""
-        return f"auto {self._field_name} = block->getData< field::GhostLayerField<real_t, " \
+        return f"auto {self._field_name} = block->getData< field::GhostLayerField<{self._pdfs_data_type}, " \
                f"{len(self._stencil)}> >({self._field_name}{identifier}ID); "
 
     def data_initialisation(self, direction_index):
-        pdf_acc = AccessPdfValues(self._boundary_object.stencil,
-                                  streaming_pattern=self._boundary_object.streaming_pattern,
-                                  timestep=self._boundary_object.zeroth_timestep,
+        pdf_acc = AccessPdfValues(self._stencil,
+                                  streaming_pattern=self._streaming_pattern,
+                                  timestep=self._zeroth_timestep,
                                   streaming_dir='out')
 
         init_list = []
         for key, value in self.get_init_dict(pdf_acc, direction_index).items():
-            init_list.append(f"element.{key} = {self._field_name}->get({value});")
+            init_list.append(f"element.{key} = {self._dtype}( {self._field_name}->get({value}) );")
 
         return "\n".join(init_list)
 
diff --git a/python/lbmpy_walberla/boundary_collection.py b/python/lbmpy_walberla/boundary_collection.py
index 17bfa245a3212404c35dd06c420fcb19a55c3049..082567204acf47b97a1344de8b1ae38288ded567 100644
--- a/python/lbmpy_walberla/boundary_collection.py
+++ b/python/lbmpy_walberla/boundary_collection.py
@@ -15,8 +15,9 @@ from pystencils import Target
 import numpy as np
 
 
-def lbm_boundary_generator(class_name: str, flag_uid: str, boundary_object: LbBoundary, additional_data_handler=None):
-    def generation_function(ctx, lb_method, field_name='pdfs',
+def lbm_boundary_generator(class_name: str, flag_uid: str, boundary_object: LbBoundary, additional_data_handler=None,
+                           field_data_type=None):
+    def generation_function(ctx, lb_method, field_name='pdfs', spatial_shape=None,
                             streaming_pattern='pull', after_collision=True,
                             namespace='lbm',
                             **create_kernel_params):
@@ -25,6 +26,8 @@ def lbm_boundary_generator(class_name: str, flag_uid: str, boundary_object: LbBo
                                                       boundary_object=boundary_object,
                                                       lb_method=lb_method,
                                                       field_name=field_name,
+                                                      spatial_shape=spatial_shape,
+                                                      field_data_type=field_data_type,
                                                       streaming_pattern=streaming_pattern,
                                                       after_collision=after_collision,
                                                       additional_data_handler=additional_data_handler,
@@ -41,6 +44,7 @@ def generate_boundary_collection(generation_context,
                                  boundary_generators,
                                  lb_method,
                                  field_name='pdfs',
+                                 spatial_shape=None,
                                  streaming_pattern='pull',
                                  prev_timestep=Timestep.BOTH,
                                  namespace='lbm',
@@ -49,22 +53,26 @@ def generate_boundary_collection(generation_context,
     kernel_list = []
     includes = []
     boundary_classes = []
+    additional_data_handlers = []
     flag_uids = []
     object_names = []
     targets = []
 
     for boundary_generator in boundary_generators:
         boundary_functor = boundary_generator['generator']
-        context = boundary_functor(generation_context, lb_method, field_name, streaming_pattern, prev_timestep,
-                                   namespace, **create_kernel_params)
+        context = boundary_functor(generation_context, lb_method, field_name, spatial_shape,
+                                   streaming_pattern, prev_timestep, namespace, **create_kernel_params)
 
         kernel_list.append(context['kernel'])
         includes.append(f"\"{context['class_name']}.h\"")
         boundary_classes.append(f"{context['namespace']}::{context['class_name']}")
+        additional_data_handlers.append(context['additional_data_handler'])
         flag_uids.append(boundary_generator['flag_id'])
         object_names.append(f"{context['class_name']}Object")
         targets.append(f"{context['target']}")
 
+    additional_constructor_arguments = [a.constructor_arguments[2:] for a in additional_data_handlers]
+
     assert len(set(targets)) == 1
     target = targets[0]
 
@@ -75,6 +83,8 @@ def generate_boundary_collection(generation_context,
         'namespace': namespace,
         'includes': includes,
         'boundary_classes': boundary_classes,
+        'additional_data_handlers': additional_data_handlers,
+        'additional_constructor_arguments': additional_constructor_arguments,
         'flag_uids': flag_uids,
         'object_names': object_names
     }
@@ -94,6 +104,8 @@ def __generate_alternating_lbm_boundary(generation_context,
                                         boundary_object,
                                         lb_method,
                                         field_name='pdfs',
+                                        spatial_shape=None,
+                                        field_data_type=None,
                                         streaming_pattern='pull',
                                         after_collision=True,
                                         additional_data_handler=None,
@@ -132,13 +144,14 @@ def __generate_alternating_lbm_boundary(generation_context,
             return OddIntegerCondition(timestep_param_name, kernel_even, kernel_odd, timestep_param_dtype)
 
     timestep_advancement = {"field_name": field_name, "function": "getTimestep"}
-
     context = pystencils_walberla.boundary.generate_boundary(generation_context,
-                                                             class_name,
-                                                             boundary_object,
+                                                             class_name=class_name,
+                                                             boundary_object=boundary_object,
                                                              field_name=field_name,
                                                              neighbor_stencil=lb_method.stencil,
                                                              index_shape=[lb_method.stencil.Q],
+                                                             spatial_shape=spatial_shape,
+                                                             field_data_type=field_data_type,
                                                              kernel_creation_function=boundary_creation_function,
                                                              namespace=namespace,
                                                              additional_data_handler=additional_data_handler,
diff --git a/python/lbmpy_walberla/packing_kernels.py b/python/lbmpy_walberla/packing_kernels.py
index 53e5d877e14f5584c1e2eb7738fe018752998485..8a8728031cd4eb56ba4366e25ef6fd85b1f2e1b5 100644
--- a/python/lbmpy_walberla/packing_kernels.py
+++ b/python/lbmpy_walberla/packing_kernels.py
@@ -83,7 +83,8 @@ def generate_packing_kernels(generation_context: CodeGenerationContext, class_na
 
 class PackingKernelsCodegen:
 
-    def __init__(self, stencil, streaming_pattern, class_name, config: CreateKernelConfig):
+    def __init__(self, stencil, streaming_pattern, class_name, config: CreateKernelConfig,
+                 src_field=None, dst_field=None):
         self.stencil = stencil
         self.dim = stencil.D
         self.values_per_cell = stencil.Q
@@ -94,10 +95,11 @@ class PackingKernelsCodegen:
         self.config = config
         self.data_type = config.data_type['pdfs'].numpy_dtype
 
-        self.src_field, self.dst_field = fields(
-            f'pdfs_src({self.values_per_cell}), pdfs_dst({self.values_per_cell}) :{self.data_type}[{self.dim}D]')
+        self.src_field = src_field if src_field else fields(f'pdfs_src({stencil.Q}) :{self.data_type}[{stencil.D}D]')
+        self.dst_field = dst_field if dst_field else fields(f'pdfs_dst({stencil.Q}) :{self.data_type}[{stencil.D}D]')
+
         self.accessors = [get_accessor(streaming_pattern, t) for t in get_timesteps(streaming_pattern)]
-        self.mask_field = fields(f'mask : uint32 [{self.dim}D]')
+        self.mask_field = fields(f'mask : uint32 [{self.dim}D]', layout=src_field.layout)
 
     def create_uniform_kernel_families(self, kernels_dict=None):
         kernels = dict() if kernels_dict is None else kernels_dict
diff --git a/python/lbmpy_walberla/storage_specification.py b/python/lbmpy_walberla/storage_specification.py
index d3eaec45797a40046d4ba273487c580274865878..dd74fe8d0202039fc03630004127c95d3a9f3bb2 100644
--- a/python/lbmpy_walberla/storage_specification.py
+++ b/python/lbmpy_walberla/storage_specification.py
@@ -4,10 +4,10 @@ from dataclasses import replace
 from jinja2 import Environment, PackageLoader, StrictUndefined
 import numpy as np
 
-from pystencils import Target
+from pystencils import fields, Target
 
-from lbmpy import LBMConfig
-from lbmpy.advanced_streaming import is_inplace
+from lbmpy import LBMConfig, LBMOptimisation
+from lbmpy.advanced_streaming import is_inplace, get_accessor, Timestep
 from lbmpy.methods import AbstractLbMethod
 
 from pystencils_walberla.cmake_integration import CodeGenerationContext
@@ -15,11 +15,15 @@ from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_en
 from pystencils_walberla.utility import config_from_context
 from pystencils_walberla.instrumentation import likwid_wrap_family
 from lbmpy_walberla.packing_kernels import PackingKernelsCodegen
+from lbmpy_walberla.utility import create_pdf_field
 
 
 def generate_lbm_storage_specification(generation_context: CodeGenerationContext, class_name: str,
-                                       method: AbstractLbMethod, lbm_config: LBMConfig, nonuniform: bool = False,
-                                       target: Target = Target.CPU, data_type=None, cpu_openmp: bool = False,
+                                       method: AbstractLbMethod,
+                                       lbm_config: LBMConfig, lbm_optimisation: LBMOptimisation,
+                                       nonuniform: bool = False,
+                                       target: Target = Target.CPU,
+                                       data_type=None, cpu_openmp: bool = False,
                                        **create_kernel_params):
     namespace = "lbm"
     stencil = method.stencil
@@ -33,10 +37,32 @@ def generate_lbm_storage_specification(generation_context: CodeGenerationContext
     config = replace(config, cpu_vectorize_info=None)
 
     default_dtype = config.data_type.default_factory()
-    is_float = True if issubclass(default_dtype.numpy_dtype.type, np.float32) else False
-    constant_suffix = "f" if is_float else ""
-
-    cg = PackingKernelsCodegen(stencil, streaming_pattern, class_name, config)
+    if issubclass(default_dtype.numpy_dtype.type, np.float64):
+        data_type_string = "double"
+    elif issubclass(default_dtype.numpy_dtype.type, np.float32):
+        data_type_string = "float"
+    elif issubclass(default_dtype.numpy_dtype.type, np.float16):
+        data_type_string = "half"
+    else:
+        raise ValueError(f"default datatype {default_dtype.numpy_dtype.type} is not supported. "
+                         f"Supported are only np.float64, np.float32 and np.float16")
+
+    symbolic_field = lbm_optimisation.symbolic_field
+    if not symbolic_field:
+        symbolic_field = create_pdf_field(config=config, name="pdfs_src", stencil=stencil,
+                                          field_layout=lbm_optimisation.field_layout)
+
+    if is_inplace(streaming_pattern):
+        symbolic_temporary_field = create_pdf_field(config=config, name="pdfs_dst", stencil=stencil,
+                                                    field_layout=lbm_optimisation.field_layout)
+    else:
+        symbolic_temporary_field = lbm_optimisation.symbolic_temporary_field
+        if not symbolic_temporary_field:
+            symbolic_temporary_field = create_pdf_field(config=config, name="pdfs_dst", stencil=stencil,
+                                                        field_layout=lbm_optimisation.field_layout)
+
+    cg = PackingKernelsCodegen(stencil, streaming_pattern, class_name, config,
+                               src_field=symbolic_field, dst_field=symbolic_temporary_field)
     kernels = cg.create_uniform_kernel_families()
 
     if nonuniform:
@@ -55,6 +81,16 @@ def generate_lbm_storage_specification(generation_context: CodeGenerationContext
     cqc = method.conserved_quantity_computation
     equilibrium = method.equilibrium_distribution
 
+    f = fields(f"f({stencil.Q}): double[{stencil.D}D]", layout='fzyx')
+    even_accessor = get_accessor(streaming_pattern, Timestep.EVEN)
+    odd_accessor = get_accessor(streaming_pattern, Timestep.ODD)
+
+    even_read = even_accessor.read(f, stencil)
+    even_write = even_accessor.write(f, stencil)
+
+    odd_read = odd_accessor.read(f, stencil)
+    odd_write = odd_accessor.write(f, stencil)
+
     jinja_context = {
         'class_name': class_name,
         'namespace': namespace,
@@ -67,12 +103,17 @@ def generate_lbm_storage_specification(generation_context: CodeGenerationContext
         'equilibrium_deviation_only': equilibrium.deviation_only,
         'inplace': is_inplace(streaming_pattern),
         'zero_centered': cqc.zero_centered_pdfs,
-        'weights': ",".join(str(w.evalf()) + constant_suffix for w in method.weights),
-        'inverse_weights': ",".join(str((1 / w).evalf()) + constant_suffix for w in method.weights),
+
+        'weights': ", ".join(f"{data_type_string}({str(w.evalf())})" for w in method.weights),
+        'inverse_weights': ", ".join(f"{data_type_string}({str((1 / w).evalf())})" for w in method.weights),
+        'even_read': _get_access_list(even_read, stencil.D),
+        'even_write': _get_access_list(even_write, stencil.D),
+        'odd_read': _get_access_list(odd_read, stencil.D),
+        'odd_write': _get_access_list(odd_write, stencil.D),
 
         'nonuniform': nonuniform,
         'target': target.name.lower(),
-        'dtype': "float" if is_float else "double",
+        'dtype': data_type_string,
         'is_gpu': target == Target.GPU,
         'likwid': generation_context.likwid,
         'kernels': kernels,
@@ -93,3 +134,16 @@ def generate_lbm_storage_specification(generation_context: CodeGenerationContext
     source_extension = "cu" if target == Target.GPU and generation_context.cuda else "cpp"
     generation_context.write_file(f"{class_name}.h", header)
     generation_context.write_file(f"{class_name}.{source_extension}", source)
+
+
+def _get_access_list(access_list, dim):
+    result = []
+    for i in range(dim):
+        result.append(", ".join([str(int(field_access.offsets[i])) for field_access in access_list]))
+
+    if dim == 2:
+        result.append(", ".join(["0"] * len(access_list)))
+
+    result.append(", ".join([str(int(field_access.index[0])) for field_access in access_list]))
+
+    return result
diff --git a/python/lbmpy_walberla/sweep_collection.py b/python/lbmpy_walberla/sweep_collection.py
index 8edd0779b328de768cba4a3acb5f04bdb6bb3acf..5fe4892ab3bc9740f3ca04775363586b42134e96 100644
--- a/python/lbmpy_walberla/sweep_collection.py
+++ b/python/lbmpy_walberla/sweep_collection.py
@@ -7,9 +7,10 @@ import numpy as np
 from pystencils import Target, create_kernel
 from pystencils.config import CreateKernelConfig
 from pystencils.field import Field
+from pystencils.simp import add_subexpressions_for_field_reads
 
 from lbmpy.advanced_streaming import is_inplace, get_accessor, Timestep
-from lbmpy.creationfunctions import LbmCollisionRule
+from lbmpy.creationfunctions import LbmCollisionRule, LBMConfig, LBMOptimisation
 from lbmpy.fieldaccess import CollideOnlyInplaceAccessor
 from lbmpy.macroscopic_value_kernels import macroscopic_values_setter, macroscopic_values_getter
 from lbmpy.updatekernels import create_lbm_kernel, create_stream_only_kernel
@@ -17,41 +18,45 @@ from lbmpy.updatekernels import create_lbm_kernel, create_stream_only_kernel
 from pystencils_walberla.kernel_selection import KernelCallNode, KernelFamily
 from pystencils_walberla.utility import config_from_context
 from pystencils_walberla import generate_sweep_collection
+from lbmpy_walberla.utility import create_pdf_field
 
 from .alternating_sweeps import EvenIntegerCondition
 from .function_generator import kernel_family_function_generator
 
 
 def generate_lbm_sweep_collection(ctx, class_name: str, collision_rule: LbmCollisionRule,
-                                  streaming_pattern='pull',
-                                  field_layout='fzyx', refinement_scaling=None,
-                                  macroscopic_fields: Dict[str, Field] = None,
+                                  lbm_config: LBMConfig, lbm_optimisation: LBMOptimisation,
+                                  refinement_scaling=None, macroscopic_fields: Dict[str, Field] = None,
                                   target=Target.CPU, data_type=None, cpu_openmp=None, cpu_vectorize_info=None,
                                   max_threads=None,
                                   **create_kernel_params):
+
     config = config_from_context(ctx, target=target, data_type=data_type,
                                  cpu_openmp=cpu_openmp, cpu_vectorize_info=cpu_vectorize_info, **create_kernel_params)
 
+    streaming_pattern = lbm_config.streaming_pattern
+    field_layout = lbm_optimisation.field_layout
+
     # usually a numpy layout is chosen by default i.e. xyzf - which is bad for waLBerla where at least the spatial
     # coordinates should be ordered in reverse direction i.e. zyx
     lb_method = collision_rule.method
 
-    q = lb_method.stencil.Q
-    dim = lb_method.stencil.D
-
     if field_layout == 'fzyx':
         config.cpu_vectorize_info['assume_inner_stride_one'] = True
     elif field_layout == 'zyxf':
         config.cpu_vectorize_info['assume_inner_stride_one'] = False
 
-    src_field = Field.create_generic('pdfs', dim, config.data_type['pdfs'].numpy_dtype,
-                                     index_dimensions=1, layout=field_layout, index_shape=(q,))
+    src_field = lbm_optimisation.symbolic_field
+    if not src_field:
+        src_field = create_pdf_field(config=config, name="pdfs", stencil=lbm_config.stencil,
+                                     field_layout=lbm_optimisation.field_layout)
     if is_inplace(streaming_pattern):
         dst_field = src_field
     else:
-        dst_field = Field.create_generic('pdfs_tmp', dim, config.data_type['pdfs_tmp'].numpy_dtype,
-                                         index_dimensions=1, layout=field_layout,
-                                         index_shape=(q,))
+        dst_field = lbm_optimisation.symbolic_temporary_field
+        if not dst_field:
+            dst_field = create_pdf_field(config=config, name="pdfs_tmp", stencil=lbm_config.stencil,
+                                         field_layout=lbm_optimisation.field_layout)
 
     config = replace(config, ghost_layers=0)
 
@@ -68,12 +73,16 @@ def generate_lbm_sweep_collection(ctx, class_name: str, collision_rule: LbmColli
     function_generators.append(generator('stream', family("stream")))
     function_generators.append(generator('streamOnlyNoAdvancement', family("streamOnlyNoAdvancement")))
 
-    setter_family = get_setter_family(class_name, lb_method, src_field, streaming_pattern, macroscopic_fields, config)
+    config_unoptimized = replace(config, cpu_vectorize_info=None, cpu_prepend_optimizations=[], cpu_blocking=None)
+
+    setter_family = get_setter_family(class_name, lb_method, src_field, streaming_pattern, macroscopic_fields,
+                                      config_unoptimized)
     setter_generator = kernel_family_function_generator('initialise', setter_family,
                                                         namespace='lbm', max_threads=max_threads)
     function_generators.append(setter_generator)
 
-    getter_family = get_getter_family(class_name, lb_method, src_field, streaming_pattern, macroscopic_fields, config)
+    getter_family = get_getter_family(class_name, lb_method, src_field, streaming_pattern, macroscopic_fields,
+                                      config_unoptimized)
     getter_generator = kernel_family_function_generator('calculateMacroscopicParameters', getter_family,
                                                         namespace='lbm', max_threads=max_threads)
     function_generators.append(getter_generator)
@@ -98,15 +107,17 @@ class RefinementScaling:
 def lbm_kernel_family(class_name, kernel_name,
                       collision_rule, streaming_pattern, src_field, dst_field, config: CreateKernelConfig):
 
+    default_dtype = config.data_type.default_factory()
     if kernel_name == "streamCollide":
         def lbm_kernel(field_accessor, lb_stencil):
-            return create_lbm_kernel(collision_rule, src_field, dst_field, field_accessor)
+            return create_lbm_kernel(collision_rule, src_field, dst_field, field_accessor, data_type=default_dtype)
         advance_timestep = {"field_name": src_field.name, "function": "advanceTimestep"}
         temporary_fields = ['pdfs_tmp']
         field_swaps = [('pdfs', 'pdfs_tmp')]
     elif kernel_name == "collide":
         def lbm_kernel(field_accessor, lb_stencil):
-            return create_lbm_kernel(collision_rule, src_field, dst_field, CollideOnlyInplaceAccessor())
+            return create_lbm_kernel(collision_rule, src_field, dst_field, CollideOnlyInplaceAccessor(),
+                                     data_type=default_dtype)
         advance_timestep = {"field_name": src_field.name, "function": "advanceTimestep"}
         temporary_fields = ()
         field_swaps = ()
@@ -161,6 +172,8 @@ def get_setter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopi
     density = macroscopic_fields.get('density', 1.0)
     velocity = macroscopic_fields.get('velocity', [0.0] * dim)
 
+    default_dtype = config.data_type.default_factory()
+
     get_timestep = {"field_name": pdfs.name, "function": "getTimestep"}
     temporary_fields = ()
     field_swaps = ()
@@ -173,6 +186,9 @@ def get_setter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopi
                                                density=density, velocity=velocity, pdfs=pdfs,
                                                streaming_pattern=streaming_pattern, previous_timestep=timestep)
 
+            if default_dtype != pdfs.dtype:
+                setter = add_subexpressions_for_field_reads(setter, data_type=default_dtype)
+
             setter_ast = create_kernel(setter, config=config)
             setter_ast.function_name = 'kernel_initialise' + timestep_suffix
             nodes.append(KernelCallNode(setter_ast))
@@ -199,6 +215,8 @@ def get_getter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopi
     if density is None and velocity is None:
         return None
 
+    default_dtype = config.data_type.default_factory()
+
     get_timestep = {"field_name": pdfs.name, "function": "getTimestep"}
     temporary_fields = ()
     field_swaps = ()
@@ -211,6 +229,9 @@ def get_getter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopi
                                                density=density, velocity=velocity, pdfs=pdfs,
                                                streaming_pattern=streaming_pattern, previous_timestep=timestep)
 
+            if default_dtype != pdfs.dtype:
+                getter = add_subexpressions_for_field_reads(getter, data_type=default_dtype)
+
             getter_ast = create_kernel(getter, config=config)
             getter_ast.function_name = 'kernel_getter' + timestep_suffix
             nodes.append(KernelCallNode(getter_ast))
diff --git a/python/lbmpy_walberla/templates/BoundaryCollection.tmpl.h b/python/lbmpy_walberla/templates/BoundaryCollection.tmpl.h
index 5f49137846ba99d60888e7353ac4ff195ade2a84..47f313860dacc6cf69c783047c3ed53e82bcb91b 100644
--- a/python/lbmpy_walberla/templates/BoundaryCollection.tmpl.h
+++ b/python/lbmpy_walberla/templates/BoundaryCollection.tmpl.h
@@ -41,12 +41,12 @@ class {{class_name}}
    enum Type { ALL = 0, INNER = 1, OUTER = 2 };
 
 
-   {{class_name}}( {{- ["const shared_ptr<StructuredBlockForest> & blocks", "BlockDataID flagID_", "BlockDataID pdfsID_", "FlagUID domainUID_", [kernel_list|generate_constructor_parameters(['indexVector', 'indexVectorSize', 'pdfs'])]] | type_identifier_list -}} )
+   {{class_name}}( {{- ["const shared_ptr<StructuredBlockForest> & blocks", "BlockDataID flagID_", "BlockDataID pdfsID_", "FlagUID domainUID_", [kernel_list|generate_constructor_parameters(['indexVector', 'indexVectorSize', 'pdfs'])], additional_constructor_arguments] | type_identifier_list -}} )
       : blocks_(blocks), flagID(flagID_), pdfsID(pdfsID_), domainUID(domainUID_)
    {
-      {% for object_name, boundary_class, kernel in zip(object_names, boundary_classes, kernel_list) -%}
+      {% for object_name, boundary_class, kernel, additional_data_handler in zip(object_names, boundary_classes, kernel_list, additional_data_handlers) -%}
 
-      {{object_name}} = std::make_shared< {{boundary_class}} >({{- ["blocks", "pdfsID", [kernel|generate_function_collection_call(['indexVector', 'indexVectorSize', 'pdfs', 'timestep', 'gpuStream'])]] | type_identifier_list -}});
+      {{object_name}} = std::make_shared< {{boundary_class}} >({{- ["blocks", "pdfsID", [kernel|generate_function_collection_call(['indexVector', 'indexVectorSize', 'pdfs', 'timestep', 'gpuStream'])], additional_data_handler.constructor_argument_name] | type_identifier_list -}});
       {% endfor %}
 
       {% for object_name, flag_uid in zip(object_names, flag_uids) -%}
@@ -105,4 +105,3 @@ class {{class_name}}
 
 }
 }
-
diff --git a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp
index 91c7d7d960a78552628d3d8568dd611f13c14a2d..92106cddf0248c93707e6881b600b6a702d27985 100644
--- a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp
+++ b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp
@@ -27,31 +27,31 @@
 #   pragma GCC diagnostic ignored "-Wunused-variable"
 #endif
 
-/*************************************************************************************
+namespace walberla {
+namespace {{namespace}} {
+
+   /*************************************************************************************
  *                                Kernel Definitions
 *************************************************************************************/
-{{ kernels['packAll']      | generate_definitions }}
-{{ kernels['unpackAll']    | generate_definitions }}
-{{ kernels['localCopyAll'] | generate_definitions }}
-
-{{ kernels['packDirection']      | generate_definitions }}
-{{ kernels['unpackDirection']    | generate_definitions }}
-{{ kernels['localCopyDirection'] | generate_definitions }}
-
-{% if nonuniform -%}
-{{ kernels['unpackRedistribute']    | generate_definitions }}
-{{ kernels['packPartialCoalescence']    | generate_definitions }}
-{{ kernels['zeroCoalescenceRegion']    | generate_definitions }}
-{{ kernels['unpackCoalescence']    | generate_definitions }}
-{%- endif %}
-
-/*************************************************************************************
+   {{ kernels['packAll']      | generate_definitions }}
+   {{ kernels['unpackAll']    | generate_definitions }}
+   {{ kernels['localCopyAll'] | generate_definitions }}
+
+   {{ kernels['packDirection']      | generate_definitions }}
+   {{ kernels['unpackDirection']    | generate_definitions }}
+   {{ kernels['localCopyDirection'] | generate_definitions }}
+
+   {% if nonuniform -%}
+   {{ kernels['unpackRedistribute']    | generate_definitions }}
+   {{ kernels['packPartialCoalescence']    | generate_definitions }}
+   {{ kernels['zeroCoalescenceRegion']    | generate_definitions }}
+   {{ kernels['unpackCoalescence']    | generate_definitions }}
+   {%- endif %}
+
+   /*************************************************************************************
  *                                 Kernel Wrappers
 *************************************************************************************/
 
-namespace walberla {
-namespace {{namespace}} {
-
    void {{class_name}}::PackKernels::packAll(
       {{- [ "PdfField_T * " + src_field.name, "CellInterval & ci",
              "unsigned char * outBuffer", kernels['packAll'].kernel_selection_parameters,
@@ -128,8 +128,8 @@ namespace {{namespace}} {
       WALBERLA_ASSERT_EQUAL(srcInterval.zSize(), dstInterval.zSize())
 
       {{kernels['localCopyDirection']
-          | generate_call(cell_interval={src_field : 'srcInterval', dst_field : 'dstInterval'}, stream='stream')
-          | indent(6) }}
+               | generate_call(cell_interval={src_field : 'srcInterval', dst_field : 'dstInterval'}, stream='stream')
+               | indent(6) }}
    }
 
    {% if nonuniform -%}
diff --git a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h
index d6039b077c4ab95e226f5c68ecc34a0a138b6932..c8f7670aded37ad2472a0385591d24f75f5ee374 100644
--- a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h
+++ b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h
@@ -82,6 +82,32 @@ class {{class_name}}
    // Inverse lattice weights
    static constexpr {{dtype}} wInv[{{stencil_size}}] = { {{inverse_weights}} };
 
+   struct AccessorEVEN
+   {
+      static constexpr cell_idx_t readX[{{stencil_size}}] = { {{even_read[0]}} };
+      static constexpr cell_idx_t readY[{{stencil_size}}] = { {{even_read[1]}} };
+      static constexpr cell_idx_t readZ[{{stencil_size}}] = { {{even_read[2]}} };
+      static constexpr cell_idx_t readD[{{stencil_size}}] = { {{even_read[3]}} };
+
+      static constexpr cell_idx_t writeX[{{stencil_size}}] = { {{even_write[0]}} };
+      static constexpr cell_idx_t writeY[{{stencil_size}}] = { {{even_write[1]}} };
+      static constexpr cell_idx_t writeZ[{{stencil_size}}] = { {{even_write[2]}} };
+      static constexpr cell_idx_t writeD[{{stencil_size}}] = { {{even_write[3]}} };
+   };
+
+   struct AccessorODD
+   {
+      static constexpr cell_idx_t readX[{{stencil_size}}] = { {{odd_read[0]}} };
+      static constexpr cell_idx_t readY[{{stencil_size}}] = { {{odd_read[1]}} };
+      static constexpr cell_idx_t readZ[{{stencil_size}}] = { {{odd_read[2]}} };
+      static constexpr cell_idx_t readD[{{stencil_size}}] = { {{odd_read[3]}} };
+
+      static constexpr cell_idx_t writeX[{{stencil_size}}] = { {{odd_write[0]}} };
+      static constexpr cell_idx_t writeY[{{stencil_size}}] = { {{odd_write[1]}} };
+      static constexpr cell_idx_t writeZ[{{stencil_size}}] = { {{odd_write[2]}} };
+      static constexpr cell_idx_t writeD[{{stencil_size}}] = { {{odd_write[3]}} };
+   };
+
    // Compute kernels to pack and unpack MPI buffers
    class PackKernels {
 
@@ -100,8 +126,8 @@ class {{class_name}}
       static const bool inplace = {% if inplace -%} true {%- else -%} false {%- endif -%};
 
       /**
-       * Packs all pdfs from the given cell interval to the send buffer.
-       * */
+      * Packs all pdfs from the given cell interval to the send buffer.
+      * */
       void packAll(
          {{- [ "PdfField_T * " + src_field.name, "CellInterval & ci",
                 "unsigned char * outBuffer", kernels['packAll'].kernel_selection_parameters,
@@ -172,7 +198,7 @@ class {{class_name}}
        * @return    The required size of the buffer, in bytes
        * */
       uint_t size (CellInterval & ci, stencil::Direction dir) const {
-         return ci.numCells() * sizes[dir] * sizeof(value_type);
+         return ci.numCells() * sizes[dir] * uint_c(sizeof(value_type));
       }
 
       /**
@@ -182,7 +208,7 @@ class {{class_name}}
        * @return    The required size of the buffer, in bytes
        * */
       uint_t size (CellInterval & ci) const {
-         return ci.numCells() * {{stencil_size}} * sizeof(value_type);
+         return ci.numCells() * {{stencil_size}} * uint_c(sizeof(value_type));
       }
 
       {% if nonuniform -%}
@@ -255,6 +281,8 @@ class {{class_name}}
       const uint_t sizes[{{direction_sizes|length}}] { {{ direction_sizes | join(', ') }} };
    };
 
+   using value_type = PackKernels::value_type;
+
 };
 
 }} //{{namespace}}/walberla
\ No newline at end of file
diff --git a/python/lbmpy_walberla/utility.py b/python/lbmpy_walberla/utility.py
index 1289c381e7b50ac7e83d34fca887e6d659959b92..75460d5f9458104e02f21ba52d54acbb261628b7 100644
--- a/python/lbmpy_walberla/utility.py
+++ b/python/lbmpy_walberla/utility.py
@@ -1,4 +1,8 @@
+import numpy as np
+from pystencils import CreateKernelConfig, fields
+
 from lbmpy.advanced_streaming import Timestep
+from lbmpy.stencils import LBStencil
 
 
 def timestep_suffix(timestep: Timestep):
@@ -9,3 +13,9 @@ def timestep_suffix(timestep: Timestep):
     """
     return ("_" + str(timestep)) if timestep != Timestep.BOTH else ''
 
+
+def create_pdf_field(config: CreateKernelConfig, name: str, stencil: LBStencil, field_layout: str = 'fzyx'):
+    default_dtype = config.data_type.default_factory()
+    data_type = default_dtype.numpy_dtype
+    return fields(f'{name}({stencil.Q}) :{data_type}[{stencil.D}D]', layout=field_layout)
+
diff --git a/python/lbmpy_walberla/walberla_lbm_package.py b/python/lbmpy_walberla/walberla_lbm_package.py
index e21d6c9613a1c2be87e21cbc06a2a78212f72552..80b37a4a90f717e79ea60890a802a363c45dde52 100644
--- a/python/lbmpy_walberla/walberla_lbm_package.py
+++ b/python/lbmpy_walberla/walberla_lbm_package.py
@@ -17,7 +17,9 @@ def generate_lbm_package(ctx: CodeGenerationContext, name: str,
                          lbm_config: LBMConfig, lbm_optimisation: LBMOptimisation,
                          nonuniform: bool = False, boundaries: List[Callable] = None,
                          macroscopic_fields: Dict[str, Field] = None,
-                         target: Target = Target.CPU, data_type=None, cpu_openmp=None, cpu_vectorize_info=None,
+                         target: Target = Target.CPU,
+                         data_type=None, pdfs_data_type=None,
+                         cpu_openmp=None, cpu_vectorize_info=None,
                          max_threads=None,
                          **kernel_parameters):
 
@@ -27,8 +29,10 @@ def generate_lbm_package(ctx: CodeGenerationContext, name: str,
     method = collision_rule.method
 
     storage_spec_name = f'{name}StorageSpecification'
-    generate_lbm_storage_specification(ctx, storage_spec_name, method, lbm_config,
-                                       nonuniform=nonuniform, target=target, data_type=data_type)
+    generate_lbm_storage_specification(ctx, storage_spec_name, method, lbm_config, lbm_optimisation,
+                                       nonuniform=nonuniform, target=target,
+                                       data_type=pdfs_data_type,
+                                       cpu_openmp=cpu_openmp)
 
     if nonuniform:
         omega = get_shear_relaxation_rate(method)
@@ -37,10 +41,8 @@ def generate_lbm_package(ctx: CodeGenerationContext, name: str,
     else:
         refinement_scaling = None
 
-    streaming_pattern = lbm_config.streaming_pattern
     generate_lbm_sweep_collection(ctx, f'{name}SweepCollection', collision_rule,
-                                  streaming_pattern=streaming_pattern,
-                                  field_layout=lbm_optimisation.field_layout,
+                                  lbm_config=lbm_config, lbm_optimisation=lbm_optimisation,
                                   refinement_scaling=refinement_scaling,
                                   macroscopic_fields=macroscopic_fields,
                                   target=target, data_type=data_type,
@@ -48,6 +50,11 @@ def generate_lbm_package(ctx: CodeGenerationContext, name: str,
                                   max_threads=max_threads,
                                   **kernel_parameters)
 
+    spatial_shape = None
+    if lbm_optimisation.symbolic_field and lbm_optimisation.symbolic_field.has_fixed_shape:
+        spatial_shape = lbm_optimisation.symbolic_field.spatial_shape + (lbm_config.stencil.Q, )
+
     generate_boundary_collection(ctx, f'{name}BoundaryCollection', boundary_generators=boundaries,
-                                 lb_method=method, streaming_pattern=streaming_pattern,
+                                 lb_method=method, field_name='pdfs', spatial_shape=spatial_shape,
+                                 streaming_pattern=lbm_config.streaming_pattern,
                                  target=target, layout=lbm_optimisation.field_layout)
diff --git a/python/pystencils_walberla/additional_data_handler.py b/python/pystencils_walberla/additional_data_handler.py
index 2c4efcc65bdd60451e32e7cefa88f9baf8d6c03e..48f87dd5307a2da6c4ceb3bfcd59974ef9896691 100644
--- a/python/pystencils_walberla/additional_data_handler.py
+++ b/python/pystencils_walberla/additional_data_handler.py
@@ -16,6 +16,10 @@ class AdditionalDataHandler:
         else:
             self._walberla_stencil = stencil
 
+    @property
+    def constructor_argument_name(self):
+        return ""
+
     @property
     def constructor_arguments(self):
         return ""
diff --git a/python/pystencils_walberla/boundary.py b/python/pystencils_walberla/boundary.py
index c5a5e54c1d00d9d6e476306453eae4320b6f5aa8..7af79ed677697633c099a5dec78b2b9afb66a226 100644
--- a/python/pystencils_walberla/boundary.py
+++ b/python/pystencils_walberla/boundary.py
@@ -19,7 +19,9 @@ def generate_boundary(generation_context,
                       field_name,
                       neighbor_stencil,
                       index_shape,
+                      spatial_shape=None,
                       field_type=FieldType.GENERIC,
+                      field_data_type=None,
                       kernel_creation_function=None,
                       target=Target.CPU,
                       data_type=None,
@@ -47,14 +49,17 @@ def generate_boundary(generation_context,
     del create_kernel_params['default_number_int']
     del create_kernel_params['skip_independence_check']
 
-    field_data_type = config.data_type[field_name].numpy_dtype
+    if field_data_type is None:
+        field_data_type = config.data_type[field_name].numpy_dtype
 
     index_struct_dtype = numpy_data_type_for_boundary_object(boundary_object, dim)
 
-    field = Field.create_generic(field_name, dim,
-                                 field_data_type,
-                                 index_dimensions=len(index_shape), layout=layout, index_shape=index_shape,
-                                 field_type=field_type)
+    if spatial_shape:
+        field = Field.create_fixed_size(field_name, spatial_shape, index_dimensions=len(index_shape),
+                                        dtype=field_data_type, layout=layout, field_type=field_type)
+    else:
+        field = Field.create_generic(field_name, dim, dtype=field_data_type, index_dimensions=len(index_shape),
+                                     layout=layout, index_shape=index_shape, field_type=field_type)
 
     index_field = Field('indexVector', FieldType.INDEXED, index_struct_dtype, layout=[0],
                         shape=(TypedSymbol("indexVectorSize", create_type("int32")), 1), strides=(1, 1))
@@ -126,5 +131,3 @@ def generate_staggered_flux_boundary(generation_context, class_name, boundary_ob
     assert dim == len(neighbor_stencil[0])
     generate_boundary(generation_context, class_name, boundary_object, 'flux', neighbor_stencil, index_shape,
                       FieldType.STAGGERED_FLUX, target=target, **kwargs)
-
-
diff --git a/python/pystencils_walberla/templates/Boundary.tmpl.h b/python/pystencils_walberla/templates/Boundary.tmpl.h
index 96a9202c19345f0e36c5e048be1ee65969f5c966..75e3cd13abdacdcd06ea7ba784552b66daa21b63 100644
--- a/python/pystencils_walberla/templates/Boundary.tmpl.h
+++ b/python/pystencils_walberla/templates/Boundary.tmpl.h
@@ -48,6 +48,10 @@
 #define RESTRICT
 #endif
 
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+using walberla::half;
+#endif
+
 namespace walberla {
 namespace {{namespace}} {
 
diff --git a/python/pystencils_walberla/templates/SweepCollection.tmpl.h b/python/pystencils_walberla/templates/SweepCollection.tmpl.h
index 5db4ccb33457efcc2f9f9385d0f2b32db35aef5e..2a89fef5e4c6c825443db51ed1acc3a6d02ffaa8 100644
--- a/python/pystencils_walberla/templates/SweepCollection.tmpl.h
+++ b/python/pystencils_walberla/templates/SweepCollection.tmpl.h
@@ -64,20 +64,20 @@ namespace {{namespace}} {
 
 class {{class_name}}
 {
-public:
-  enum Type { ALL = 0, INNER = 1, OUTER = 2 };
+ public:
+   enum Type { ALL = 0, INNER = 1, OUTER = 2 };
 
    {{class_name}}(const shared_ptr< StructuredBlockStorage > & blocks, {{kernel_list|generate_constructor_parameters}}, const Cell & outerWidth=Cell(1, 1, 1))
-     : blocks_(blocks), {{ kernel_list|generate_constructor_initializer_list(parameter_registration=parameter_scaling) }}, outerWidth_(outerWidth)
+      : blocks_(blocks), {{ kernel_list|generate_constructor_initializer_list(parameter_registration=parameter_scaling) }}, outerWidth_(outerWidth)
    {
       {{kernel_list|generate_constructor(parameter_registration=parameter_scaling) |indent(6)}}
 
+      validInnerOuterSplit_= true;
+
       for (auto& iBlock : *blocks)
       {
-         if (int_c(blocks->getNumberOfXCells(iBlock)) <= outerWidth_[0] * 2 ||
-             int_c(blocks->getNumberOfYCells(iBlock)) <= outerWidth_[1] * 2 ||
-             int_c(blocks->getNumberOfZCells(iBlock)) <= outerWidth_[2] * 2)
-          WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller or increase cellsPerBlock")
+         if (int_c(blocks->getNumberOfXCells(iBlock)) <= outerWidth_[0] * 2 || int_c(blocks->getNumberOfYCells(iBlock)) <= outerWidth_[1] * 2 || int_c(blocks->getNumberOfZCells(iBlock)) <= outerWidth_[2] * 2)
+            validInnerOuterSplit_ = false;
       }
    };
 
@@ -105,54 +105,66 @@ public:
 
    std::function<void (IBlock *)> {{kernel['function_name']}}({{- ["Type type", ] | type_identifier_list -}})
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", ] | type_identifier_list -}}); };
-         case Type::OUTER:
-            return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", ] | type_identifier_list -}}); };
-         default:
-            return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", ] | type_identifier_list -}}); };
+      case Type::INNER:
+         return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", ] | type_identifier_list -}}); };
+      case Type::OUTER:
+         return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", ] | type_identifier_list -}}); };
+      default:
+         return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", ] | type_identifier_list -}}); };
       }
    }
 
    std::function<void (IBlock *)> {{kernel['function_name']}}({{- ["Type type", "const cell_idx_t ghost_layers"] | type_identifier_list -}})
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", ] | type_identifier_list -}}); };
-         case Type::OUTER:
-            return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", ] | type_identifier_list -}}); };
-         default:
-            return [{{- ["this", "ghost_layers"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", "ghost_layers"] | type_identifier_list -}}); };
+      case Type::INNER:
+         return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", ] | type_identifier_list -}}); };
+      case Type::OUTER:
+         return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", ] | type_identifier_list -}}); };
+      default:
+         return [{{- ["this", "ghost_layers"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", "ghost_layers"] | type_identifier_list -}}); };
       }
    }
 
    {% if target is equalto 'gpu' -%}
    std::function<void (IBlock *)> {{kernel['function_name']}}({{- ["Type type", "const cell_idx_t ghost_layers", "gpuStream_t gpuStream"] | type_identifier_list -}})
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", "gpuStream"] | type_identifier_list -}}); };
-         case Type::OUTER:
-            return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", "gpuStream"] | type_identifier_list -}}); };
-         default:
-            return [{{- ["this", "ghost_layers", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", "ghost_layers", "gpuStream"] | type_identifier_list -}}); };
+      case Type::INNER:
+         return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", "gpuStream"] | type_identifier_list -}}); };
+      case Type::OUTER:
+         return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", "gpuStream"] | type_identifier_list -}}); };
+      default:
+         return [{{- ["this", "ghost_layers", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", "ghost_layers", "gpuStream"] | type_identifier_list -}}); };
       }
    }
 
    std::function<void (IBlock *)> {{kernel['function_name']}}({{- ["Type type", "gpuStream_t gpuStream"] | type_identifier_list -}})
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", "gpuStream"] | type_identifier_list -}}); };
-         case Type::OUTER:
-            return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", "gpuStream"] | type_identifier_list -}}); };
-         default:
-            return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", "cell_idx_c(0)", "gpuStream"] | type_identifier_list -}}); };
+      case Type::INNER:
+         return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", "gpuStream"] | type_identifier_list -}}); };
+      case Type::OUTER:
+         return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", "gpuStream"] | type_identifier_list -}}); };
+      default:
+         return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", "cell_idx_c(0)", "gpuStream"] | type_identifier_list -}}); };
       }
    }
    {%- endif %}
@@ -247,24 +259,24 @@ public:
          layers_.push_back(ci);
       }
 
-    {%if target is equalto 'gpu'%}
+      {%if target is equalto 'gpu'%}
       {
          auto parallelSection_ = parallelStreams_.parallelSection( gpuStream );
          for( auto & ci: layers_ )
          {
-          parallelSection_.run([&]( auto s ) {
-             {{kernel['function_name']}}CellInterval({{kernel['kernel']|generate_function_collection_call(cell_interval='ci')}});
-          });
+            parallelSection_.run([&]( auto s ) {
+               {{kernel['function_name']}}CellInterval({{kernel['kernel']|generate_function_collection_call(cell_interval='ci')}});
+            });
          }
       }
-    {% else %}
+      {% else %}
       for( auto & ci: layers_ )
       {
          {{kernel['function_name']}}CellInterval({{kernel['kernel']|generate_function_collection_call(cell_interval='ci')}});
       }
-    {% endif %}
+      {% endif %}
 
-    {{kernel['kernel']|generate_swaps|indent(9)}}
+      {{kernel['kernel']|generate_swaps|indent(9)}}
    }
    {% endfor %}
 
@@ -275,17 +287,18 @@ public:
    }
    {%endif%}
 
-   private:
-      shared_ptr< StructuredBlockStorage > blocks_;
-      {{kernel_list|generate_members(parameter_registration=parameter_scaling)|indent(4)}}
+ private:
+   shared_ptr< StructuredBlockStorage > blocks_;
+   {{kernel_list|generate_members(parameter_registration=parameter_scaling)|indent(4)}}
 
-      Cell outerWidth_;
-      std::vector<CellInterval> layers_;
+   Cell outerWidth_;
+   std::vector<CellInterval> layers_;
+   bool validInnerOuterSplit_;
 
-      {%if target is equalto 'gpu' -%}
-      gpu::ParallelStreams parallelStreams_;
-      // std::map<BlockID, gpuStream_t > streams_;
-      {%- endif %}
+   {%if target is equalto 'gpu' -%}
+   gpu::ParallelStreams parallelStreams_;
+   // std::map<BlockID, gpuStream_t > streams_;
+   {%- endif %}
 };
 
 
diff --git a/src/core/Variant.h b/src/core/Variant.h
index 2be3f0a6c09e562dd48ef54b460dc08d786683d6..94c281593e2592e9dee4c26c14fd4ba947a70e87 100644
--- a/src/core/Variant.h
+++ b/src/core/Variant.h
@@ -21,9 +21,18 @@
 
 #pragma once
 
+// suppress conversion warning that is emitted by icpc compiler with `-Werror`
+#include "waLBerlaDefinitions.h"
+#if ( defined WALBERLA_CXX_COMPILER_IS_INTEL )
+#pragma warning(push)
+#pragma warning disable 68
+#endif
 
 #include <variant>
 
+#if ( defined WALBERLA_CXX_COMPILER_IS_INTEL )
+#pragma warning(pop)
+#endif
 
 
 namespace walberla
diff --git a/tests/lbm_generated/Example.py b/tests/lbm_generated/Example.py
index 5233639be24c6574cee6440300bfe73e22e5e2ae..c4e3eaa96d596b2745811e30739790748740f884 100644
--- a/tests/lbm_generated/Example.py
+++ b/tests/lbm_generated/Example.py
@@ -23,7 +23,8 @@ with CodeGeneration() as ctx:
     omega = sp.symbols("omega")
 
     stencil = LBStencil(Stencil.D3Q19)
-    pdfs, vel_field = fields(f"pdfs({stencil.Q}), velocity({stencil.D}): {data_type}[{stencil.D}D]", layout='fzyx')
+    pdfs, vel_field = fields(f"pdfs({stencil.Q}), velocity({stencil.D}): {data_type}[{stencil.D}D]",
+                             layout='fzyx')
 
     macroscopic_fields = {'velocity': vel_field}