diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 187dd2c3185c625c066c0bf283e4875893547246..c46b9090f88ea1030e4ac1ce3eca768f32e3783e 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -19,6 +19,7 @@ stages:
 
 .build_template:
    script:
+      - source /entrypoint.sh
       - pip install -I cmake==3.16.3 jinja2
       - export NUM_CORES=$(nproc --all)
       - export MAX_BUILD_CORES=$(( $(awk '( $1 == "MemTotal:" ) { print $2 }' /proc/meminfo) / ( 4 * 1024 * 1024  ) ))
@@ -276,7 +277,7 @@ gcc_10_serial:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -305,7 +306,7 @@ gcc_10_mpionly:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -332,7 +333,7 @@ gcc_10_hybrid:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -358,7 +359,7 @@ gcc_10_serial_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -388,7 +389,7 @@ gcc_10_mpionly_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -416,7 +417,7 @@ gcc_10_hybrid_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -440,7 +441,7 @@ gcc_10_hybrid_dbg_sp:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -470,7 +471,7 @@ gcc_11_serial:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -499,7 +500,7 @@ gcc_11_mpionly:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -526,7 +527,7 @@ gcc_11_hybrid:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -552,7 +553,7 @@ gcc_11_serial_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -582,7 +583,7 @@ gcc_11_mpionly_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -610,7 +611,7 @@ gcc_11_hybrid_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -637,7 +638,7 @@ gcc_11_hybrid_dbg_sp:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -667,7 +668,7 @@ gcc_12_serial:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -696,7 +697,7 @@ gcc_12_mpionly:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -723,7 +724,7 @@ gcc_12_hybrid:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -749,7 +750,7 @@ gcc_12_serial_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -779,7 +780,7 @@ gcc_12_mpionly_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -807,7 +808,7 @@ gcc_12_hybrid_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -834,7 +835,7 @@ gcc_12_hybrid_dbg_sp:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -864,7 +865,7 @@ gcc_13_serial:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -894,7 +895,7 @@ gcc_13_mpionly:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -923,7 +924,7 @@ gcc_13_hybrid:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -947,7 +948,7 @@ gcc_13_serial_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -975,7 +976,7 @@ gcc_13_mpionly_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1001,7 +1002,7 @@ gcc_13_hybrid_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1026,7 +1027,7 @@ gcc_13_hybrid_dbg_sp:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1054,7 +1055,7 @@ clang_14_serial:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1083,7 +1084,7 @@ clang_14_mpionly:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1110,7 +1111,7 @@ clang_14_hybrid:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1136,7 +1137,7 @@ clang_14_serial_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1166,7 +1167,7 @@ clang_14_mpionly_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1194,7 +1195,7 @@ clang_14_hybrid_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1218,7 +1219,7 @@ clang_14_hybrid_dbg_sp:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1248,7 +1249,7 @@ clang_15_serial:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1277,7 +1278,7 @@ clang_15_mpionly:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1304,7 +1305,7 @@ clang_15_hybrid:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1330,7 +1331,7 @@ clang_15_serial_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1360,7 +1361,7 @@ clang_15_mpionly_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1388,7 +1389,7 @@ clang_15_hybrid_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1415,7 +1416,7 @@ clang_15_hybrid_dbg_sp:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1445,7 +1446,7 @@ clang_16_serial:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1474,7 +1475,7 @@ clang_16_mpionly:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1501,7 +1502,7 @@ clang_16_hybrid:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1527,7 +1528,7 @@ clang_16_serial_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1557,7 +1558,7 @@ clang_16_mpionly_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1585,7 +1586,7 @@ clang_16_hybrid_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1612,7 +1613,7 @@ clang_16_hybrid_dbg_sp:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1642,7 +1643,7 @@ clang_17_serial:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1671,7 +1672,7 @@ clang_17_mpionly:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1698,7 +1699,7 @@ clang_17_hybrid:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1721,7 +1722,7 @@ clang_17_serial_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1748,7 +1749,7 @@ clang_17_mpionly_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1773,7 +1774,7 @@ clang_17_hybrid_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1798,7 +1799,7 @@ clang_17_hybrid_dbg_sp:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1825,7 +1826,7 @@ aocc_4_serial:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1854,7 +1855,7 @@ aocc_4_mpionly:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1881,7 +1882,7 @@ aocc_4_hybrid:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1904,7 +1905,7 @@ aocc_4_serial_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1931,7 +1932,7 @@ aocc_4_mpionly_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1956,7 +1957,7 @@ aocc_4_hybrid_dbg:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
@@ -1980,7 +1981,7 @@ aocc_4_hybrid_dbg_sp:
    before_script:
       - python3 -m venv ci-venv
       - source ci-venv/bin/activate
-      - python3 -m pip install lbmpy==1.3.4 jinja2 pytest
+      - python3 -m pip install lbmpy==1.3.5 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - python3 -m pip list
diff --git a/apps/tutorials/codegen/HeatEquationKernelWithMaterial.py b/apps/tutorials/codegen/HeatEquationKernelWithMaterial.py
index ea80613df51bf5d4ea953162b37f1711c12b35c8..bc76c1f5aa6508e73ace166b1cb66b916c91774f 100644
--- a/apps/tutorials/codegen/HeatEquationKernelWithMaterial.py
+++ b/apps/tutorials/codegen/HeatEquationKernelWithMaterial.py
@@ -4,9 +4,9 @@ import sympy as sp
 import pystencils as ps
 from pystencils_walberla import CodeGeneration, generate_sweep
 script_dir = os.path.dirname(__file__)
-walberla_dir = os.path.join(script_dir, '..', '..', '..', '..', 'walberla')
+walberla_dir = os.path.join(script_dir, '..', '..', '..')
 sys.path.append(walberla_dir)
-# from src.materials.alloys import Ti6Al4V
+from src.materials.alloys import Ti6Al4V
 from src.materials.alloys.SS316L import SS316L
 from src.materials.assignment_converter import assignment_converter
 
diff --git a/extern/pybind11 b/extern/pybind11
index f7b499615e14d70ab098a20deb0cdb3889998a1a..7c33cdc2d39c7b99a122579f53bc94c8eb3332ff 160000
--- a/extern/pybind11
+++ b/extern/pybind11
@@ -1 +1 @@
-Subproject commit f7b499615e14d70ab098a20deb0cdb3889998a1a
+Subproject commit 7c33cdc2d39c7b99a122579f53bc94c8eb3332ff
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d49a1e63bbcc36307d83ce80e1440048b2ed8207..13e7ac49ebbf92c6718f00e275af7077e7e17539 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -24,7 +24,7 @@ add_subdirectory( blockforest )
 add_subdirectory( boundary )
 add_subdirectory( communication )
 add_subdirectory( core )
-add_subdirectory(gpu)
+add_subdirectory( gpu )
 add_subdirectory( domain_decomposition )
 add_subdirectory( executiontree )
 if ( WALBERLA_BUILD_WITH_FFT AND FFTW3_FOUND )
diff --git a/src/boundary/CMakeLists.txt b/src/boundary/CMakeLists.txt
index c122cbddc6a715ce446071a60c22dc11bb849b03..7da3ebfe360dd08ec1c60035e47944a078285139 100644
--- a/src/boundary/CMakeLists.txt
+++ b/src/boundary/CMakeLists.txt
@@ -14,4 +14,5 @@ target_sources( boundary
       BoundaryHandlingCollection.h
       Boundary.cpp
       BoundaryUID.h
+      ShiftedPeriodicity.h
       )
diff --git a/src/boundary/ShiftedPeriodicity.h b/src/boundary/ShiftedPeriodicity.h
new file mode 100644
index 0000000000000000000000000000000000000000..ce2a32e550d5524c2f291bf901323211ec12f099
--- /dev/null
+++ b/src/boundary/ShiftedPeriodicity.h
@@ -0,0 +1,611 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file ShiftedPeriodicity.h
+//! \ingroup boundary
+//! \author Helen Schottenhamml <helen.schottenhamml@fau.de>
+//
+//======================================================================================================================
+
+#include "blockforest/Block.h"
+#include "blockforest/BlockID.h"
+#include "blockforest/StructuredBlockForest.h"
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+#include "core/debug/CheckFunctions.h"
+#include "core/debug/Debug.h"
+#include "core/logging/Logging.h"
+#include "core/math/AABBFwd.h"
+#include "core/math/Vector3.h"
+#include "core/mpi/MPIWrapper.h"
+#include "core/mpi/MPIManager.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/IBlockID.h"
+#include "domain_decomposition/MapPointToPeriodicDomain.h"
+
+#include <array>
+#include <cstdlib>
+#include <iterator>
+#include <map>
+#include <memory>
+#include <tuple>
+#include <vector>
+
+
+namespace walberla {
+namespace boundary {
+
+//*******************************************************************************************************************
+/*!
+ * A periodicity boundary condition that adds a user-defined spatial shift to the field when applied.
+ * This shift can prevent the locking of large-scale turbulent features in the flow direction, see e.g.,
+ * Munters et al. (https://doi.org/10.1063/1.4941912).
+ *
+ * Periodicity defined in the blockforest must be turned off in the normal-direction. Only uniform grids are supported
+ * for the moment. Normal direction and shift direction must not coincide.
+ * Moreover, this boundary condition is only applied at the minimum and maximum of the domain in a given direction, but
+ * cannot be applied in the interior of the domain for now.
+ *
+ * This base class handles the logistics of splitting the field and communication.
+ *
+ * @tparam Field_T Type of the ghost-layer field that is shifted periodically
+ */
+//*******************************************************************************************************************
+template<typename Derived_T, typename Field_T>
+class ShiftedPeriodicityBase {
+
+ public:
+
+   using FieldType = Field_T;
+   using ValueType = typename FieldType::value_type;
+   using ShiftType = int;
+
+   ShiftedPeriodicityBase( const std::weak_ptr<StructuredBlockForest> & blockForest,
+                           const BlockDataID & fieldID, const uint_t fieldGhostLayers,
+                           const uint_t normalDir, const uint_t shiftDir, const ShiftType shiftValue )
+      : blockForest_(blockForest), normalDir_(normalDir), shiftDir_(shiftDir),
+        fieldID_( fieldID ), fieldGhostLayers_(fieldGhostLayers)
+   {
+      auto sbf = blockForest_.lock();
+
+      WALBERLA_ASSERT_NOT_NULLPTR( sbf )
+
+      WALBERLA_CHECK(sbf->storesUniformBlockGrid(), "Periodic shift is currently only implemented for uniform grids.")
+      WALBERLA_CHECK(sbf->containsGlobalBlockInformation(), "For the periodic shift, the blockforest must be constructed to retain global information.")
+
+      shift_ = Vector3<ShiftType>{};
+      auto adaptedShiftValue = shiftValue % ShiftType(sbf->getNumberOfCells(shiftDir_));
+      shift_[shiftDir_] = ShiftType(adaptedShiftValue >= 0 ? adaptedShiftValue : adaptedShiftValue + ShiftType(sbf->getNumberOfCells(shiftDir_)));
+
+      // sanity checks
+      WALBERLA_CHECK_UNEQUAL( shiftDir_, normalDir_, "Direction of periodic shift and boundary normal must not coincide." )
+
+      WALBERLA_CHECK( sbf->isPeriodic(shiftDir_), "Blockforest must be periodic in direction " << shiftDir_ << "!" )
+      WALBERLA_CHECK( !sbf->isPeriodic(normalDir_), "Blockforest must NOT be periodic in direction " << normalDir_ << "!" )
+
+   }
+
+   Vector3<ShiftType> shift() const { return shift_; }
+   uint_t shiftDirection() const { return shiftDir_; }
+   uint_t normalDirection() const { return normalDir_; }
+
+   void operator()() {
+
+      auto mpiInstance = mpi::MPIManager::instance();
+      const auto currentRank = numeric_cast<mpi::MPIRank>(mpiInstance->rank());
+
+      const auto sbf = blockForest_.lock();
+      WALBERLA_ASSERT_NOT_NULLPTR( sbf )
+
+      // only setup send and receive information once at the beginning
+      if(!setupPeriodicity_){
+         setupPeriodicity();
+         setupPeriodicity_ = true;
+      }
+
+      // set up local to avoid temporary fields; key is unique tag to ensure correct communication
+      // if several messages have the same source and destination
+      // thanks to the unique tag, the same buffer can be used for both local and MPI communication
+      std::map<int, std::vector<ValueType>> buffer;
+      std::map<BlockID, std::map<int, MPI_Request>> sendRequests;
+      std::map<BlockID, std::map<int, MPI_Request>> recvRequests;
+
+      std::vector<IBlock*> localBlocks{};
+      sbf->getBlocks(localBlocks);
+
+      /// packing and schedule sends
+
+      for( auto block : localBlocks ) {
+
+         const auto blockID = dynamic_cast<Block*>(block)->getId();
+
+         WALBERLA_ASSERT_EQUAL(sendAABBs_[blockID].size(), recvAABBs_[blockID].size() )
+
+         for(uint_t i = 0; i < sendAABBs_[blockID].size(); ++i) {
+
+            auto & sendInfo = sendAABBs_[blockID][i];
+
+            const auto sendRank = std::get<0>(sendInfo);
+            const auto sendTag = std::get<3>(sendInfo);
+
+            const auto sendCI = std::get<2>(sendInfo);
+
+            const auto sendSize = sendCI.numCells() * uint_c(fSize_);
+
+            buffer[sendTag].resize(sendSize);
+            static_cast<Derived_T*>(this)->packBuffer(block, sendCI, buffer[sendTag]);
+
+            WALBERLA_MPI_SECTION() {
+               // schedule sends if MPI
+               if (sendRank != currentRank)
+               {
+                  MPI_Isend(buffer[sendTag].data(), mpi::MPISize(buffer[sendTag].size() * sizeof(ValueType)), MPI_BYTE,
+                            sendRank, sendTag, mpiInstance->comm(), &sendRequests[blockID][sendTag]);
+               }
+            }
+
+         }
+
+      }
+
+      /// schedule receives
+
+      for( auto block : localBlocks ) {
+
+         const auto blockID = dynamic_cast<Block*>(block)->getId();
+
+         WALBERLA_ASSERT_EQUAL(sendAABBs_[blockID].size(), recvAABBs_[blockID].size() )
+
+         for(uint_t i = 0; i < recvAABBs_[blockID].size(); ++i) {
+
+            auto & recvInfo = recvAABBs_[blockID][i];
+
+            const auto recvRank = std::get<0>(recvInfo);
+            const auto recvTag   = std::get<3>(recvInfo);
+            const auto recvCI = std::get<2>(recvInfo);
+
+            WALBERLA_MPI_SECTION() {
+               // do not schedule receives for local communication
+               if (recvRank != currentRank) {
+                  const auto recvSize = recvCI.numCells() * uint_c(fSize_);
+                  buffer[recvTag].resize(recvSize);
+
+                  // Schedule receives
+                  MPI_Irecv(buffer[recvTag].data(), mpi::MPISize(buffer[recvTag].size() * sizeof(ValueType)), MPI_BYTE,
+                            recvRank, recvTag, mpiInstance->comm(), &recvRequests[blockID][recvTag]);
+               }
+            }
+
+         }
+
+      }
+
+      /// unpacking
+
+      for( auto block : localBlocks ) {
+
+         const auto blockID = dynamic_cast<Block*>(block)->getId();
+
+         for(uint_t i = 0; i < recvAABBs_[blockID].size(); ++i) {
+
+            auto & recvInfo = recvAABBs_[blockID][i];
+
+            const auto recvRank = std::get<0>(recvInfo);
+            const auto recvTag = std::get<3>(recvInfo);
+
+            const auto recvCI = std::get<2>(recvInfo);
+
+            if(recvRank == currentRank) {
+               WALBERLA_ASSERT_GREATER(buffer.count(recvTag), 0)
+               static_cast<Derived_T*>(this)->unpackBuffer(block, recvCI, buffer[recvTag]);
+            } else {
+               WALBERLA_MPI_SECTION() {
+                  MPI_Status status;
+                  MPI_Wait(&recvRequests[blockID][recvTag], &status);
+
+                  WALBERLA_ASSERT_GREATER(buffer.count(recvTag), 0)
+                  static_cast< Derived_T* >(this)->unpackBuffer(block, recvCI, buffer[recvTag]);
+               }
+            }
+
+         }
+
+      }
+
+      WALBERLA_MPI_SECTION() {
+         // wait for all communication to be finished
+         for (auto& block : localBlocks)
+         {
+            const auto blockID = dynamic_cast< Block* >(block)->getId();
+
+            if (sendRequests[blockID].empty()) return;
+
+            std::vector< MPI_Request > v;
+            std::transform(sendRequests[blockID].begin(), sendRequests[blockID].end(), std::back_inserter(v),
+                           second(sendRequests[blockID]));
+            MPI_Waitall(int_c(sendRequests[blockID].size()), v.data(), MPI_STATUSES_IGNORE);
+         }
+      }
+
+   }
+
+ private:
+
+   /*!
+    * Creates send and receive information (source, target rank, source / target blocks, source and destination CI
+    * (here in form of an AABB), and unique communication tag) per block if the shift does not coincide with the
+    * block size.
+    *
+    * If the shift does not coincide with the block size, each block is split into two send sub-blocks and two receive
+    * sub-blocks whose source / target block will differ (maybe also the rank).
+    * Afterwards, all the information necessary for the communication is determined and saved in the corresponding
+    * data structure.
+    */
+   void processDoubleAABB( const AABB & originalAABB, const std::shared_ptr<StructuredBlockForest> & sbf,
+                          const real_t nGL, const BlockID & blockID, const int normalShift ) {
+
+      WALBERLA_ASSERT(normalShift == -1 || normalShift == 1)
+
+      const auto localShift = normalShift == 1 ? shift_ : -shift_;
+
+      const auto offset = ShiftType(int_c(shift_[shiftDir_]) % int_c(sbf->getNumberOfCellsPerBlock(shiftDir_)));
+      Vector3<ShiftType> normalShiftVector{};
+      normalShiftVector[normalDir_] = ShiftType(normalShift * int_c(sbf->getNumberOfCellsPerBlock(normalDir_)));
+
+      const auto remDir = uint_t(3) - normalDir_ - shiftDir_;
+
+      AABB aabb1 = originalAABB;
+      AABB aabb2 = originalAABB;
+
+      if( normalShift == 1 ) {
+         aabb1.setAxisBounds(shiftDir_, aabb1.min(shiftDir_), aabb1.max(shiftDir_) - real_c(offset));
+         aabb2.setAxisBounds(shiftDir_, aabb2.max(shiftDir_) - real_c(offset), aabb2.max(shiftDir_));
+      } else {
+         aabb1.setAxisBounds(shiftDir_, aabb1.min(shiftDir_), aabb1.min(shiftDir_) + real_c(offset));
+         aabb2.setAxisBounds(shiftDir_, aabb2.min(shiftDir_) + real_c(offset), aabb2.max(shiftDir_));
+      }
+
+      auto center1 = aabb1.center();
+      auto center2 = aabb2.center();
+
+      // account for ghost layers
+      aabb1.setAxisBounds(remDir, aabb1.min(remDir) - nGL, aabb1.max(remDir) + nGL);
+      aabb2.setAxisBounds(remDir, aabb2.min(remDir) - nGL, aabb2.max(remDir) + nGL);
+
+      auto localSendAABB1 = aabb1;
+      auto localRecvAABB1 = aabb1;
+      auto localSendAABB2 = aabb2;
+      auto localRecvAABB2 = aabb2;
+
+      localSendAABB1.setAxisBounds(shiftDir_, localSendAABB1.min(shiftDir_), localSendAABB1.max(shiftDir_) + nGL);
+      localSendAABB2.setAxisBounds(shiftDir_, localSendAABB2.min(shiftDir_) - nGL, localSendAABB2.max(shiftDir_));
+      localRecvAABB1.setAxisBounds(shiftDir_, localRecvAABB1.min(shiftDir_) - nGL, localRecvAABB1.max(shiftDir_));
+      localRecvAABB2.setAxisBounds(shiftDir_, localRecvAABB2.min(shiftDir_), localRecvAABB2.max(shiftDir_) + nGL);
+
+      if(normalShift == 1) { // at maximum of domain -> send inner layers, receive ghost layers
+         localSendAABB1.setAxisBounds(normalDir_, localSendAABB1.max(normalDir_) - nGL, localSendAABB1.max(normalDir_));
+         localSendAABB2.setAxisBounds(normalDir_, localSendAABB2.max(normalDir_) - nGL, localSendAABB2.max(normalDir_));
+         localRecvAABB1.setAxisBounds(normalDir_, localRecvAABB1.max(normalDir_), localRecvAABB1.max(normalDir_) + nGL);
+         localRecvAABB2.setAxisBounds(normalDir_, localRecvAABB2.max(normalDir_), localRecvAABB2.max(normalDir_) + nGL);
+      } else { // at minimum of domain -> send inner layers, receive ghost layers
+         localSendAABB1.setAxisBounds(normalDir_, localSendAABB1.min(normalDir_), localSendAABB1.min(normalDir_) + nGL);
+         localSendAABB2.setAxisBounds(normalDir_, localSendAABB2.min(normalDir_), localSendAABB2.min(normalDir_) + nGL);
+         localRecvAABB1.setAxisBounds(normalDir_, localRecvAABB1.min(normalDir_) - nGL, localRecvAABB1.min(normalDir_));
+         localRecvAABB2.setAxisBounds(normalDir_, localRecvAABB2.min(normalDir_) - nGL, localRecvAABB2.min(normalDir_));
+      }
+
+      WALBERLA_ASSERT_FLOAT_EQUAL(localSendAABB1.volume(), localRecvAABB1.volume())
+      WALBERLA_ASSERT_FLOAT_EQUAL(localSendAABB2.volume(), localRecvAABB2.volume())
+
+      center1 += (localShift + normalShiftVector);
+      center2 += (localShift + normalShiftVector);
+
+      std::array<bool, 3> virtualPeriodicity{false};
+      virtualPeriodicity[normalDir_] = true;
+      virtualPeriodicity[shiftDir_] = true;
+
+      domain_decomposition::mapPointToPeriodicDomain( virtualPeriodicity, sbf->getDomain(), center1 );
+      domain_decomposition::mapPointToPeriodicDomain( virtualPeriodicity, sbf->getDomain(), center2 );
+
+      uint_t rank1{};
+      uint_t rank2{};
+
+      BlockID id1{};
+      BlockID id2{};
+
+      const auto blockInformation = sbf->getBlockInformation();
+      WALBERLA_ASSERT(blockInformation.active())
+
+      blockInformation.getProcess(rank1, center1[0], center1[1], center1[2]);
+      blockInformation.getProcess(rank2, center2[0], center2[1], center2[2]);
+
+      blockInformation.getId(id1, center1[0], center1[1], center1[2]);
+      blockInformation.getId(id2, center2[0], center2[1], center2[2]);
+
+      // define unique send / receive tags for communication
+
+      const int atMaxTagSend = normalShift < 0 ? 0 : 1;
+      const int atMaxTagRecv = normalShift < 0 ? 1 : 0;
+
+      const int sendTag1 = ((int_c(blockID.getID()) + int_c(id1.getID() * numGlobalBlocks_)) * 2 + atMaxTagSend) * 2 + 0;
+      const int sendTag2 = ((int_c(blockID.getID()) + int_c(id2.getID() * numGlobalBlocks_)) * 2 + atMaxTagSend) * 2 + 1;
+      const int recvTag2 = ((int_c(id2.getID()) + int_c(blockID.getID() * numGlobalBlocks_)) * 2 + atMaxTagRecv) * 2 + 0;
+      const int recvTag1 = ((int_c(id1.getID()) + int_c(blockID.getID() * numGlobalBlocks_)) * 2 + atMaxTagRecv) * 2 + 1;
+
+      auto block = sbf->getBlock(blockID);
+
+      sendAABBs_[blockID].emplace_back(mpi::MPIRank(rank1), id1, globalAABBToLocalCI(localSendAABB1, sbf, block), sendTag1);
+      sendAABBs_[blockID].emplace_back(mpi::MPIRank(rank2), id2, globalAABBToLocalCI(localSendAABB2, sbf, block), sendTag2);
+      recvAABBs_[blockID].emplace_back(mpi::MPIRank(rank2), id2, globalAABBToLocalCI(localRecvAABB2, sbf, block), recvTag2);
+      recvAABBs_[blockID].emplace_back(mpi::MPIRank(rank1), id1, globalAABBToLocalCI(localRecvAABB1, sbf, block), recvTag1);
+
+      WALBERLA_LOG_DETAIL("blockID = " << blockID.getID() << ", normalShift = " << normalShift
+                                       << "\n\tsendRank1 = " << rank1 << "\tsendID1 = " << id1.getID() << "\tsendTag1 = " << sendTag1 << "\taabb = " << localSendAABB1
+                                       << "\n\tsendRank2 = " << rank2 << "\tsendID2 = " << id2.getID() << "\tsendTag2 = " << sendTag2 << "\taabb = " << localSendAABB2
+                                       << "\n\trecvRank1 = " << rank1 << "\trecvID1 = " << id1.getID() << "\trecvTag1 = " << recvTag1 << "\taabb = " << localRecvAABB1
+                                       << "\n\trecvRank2 = " << rank2 << "\trecvID2 = " << id2.getID() << "\trecvTag2 = " << recvTag2 << "\taabb = " << localRecvAABB2
+      )
+   }
+
+   /*!
+    * Does the same things as `processDoubleAABB` but assuming that the shift is a multiple of the block size, i.e.,
+    * every field slice is shifted one or several entire blocks. In this case, every block only has ONE send and ONE
+    * receive block whose information must be stored.
+    */
+   void processSingleAABB( const AABB & originalAABB, const std::shared_ptr<StructuredBlockForest> & sbf,
+                          const real_t nGL, const BlockID & blockID, const int normalShift ) {
+
+      WALBERLA_ASSERT(normalShift == -1 || normalShift == 1)
+
+      const auto localShift = normalShift == 1 ? shift_ : -shift_;
+
+      Vector3<ShiftType> normalShiftVector{};
+      normalShiftVector[normalDir_] = ShiftType(normalShift * int_c(sbf->getNumberOfCellsPerBlock(normalDir_)));
+
+      // aabb
+      auto aabb = originalAABB.getExtended(nGL);
+      auto center = aabb.center();
+
+      // receive from the interior of domain in ghost layers
+      auto localSendAABB = aabb;
+      auto localRecvAABB = aabb;
+
+      if(normalShift == 1) { // at maximum of domain -> send inner layers, receive ghost layers
+         localSendAABB.setAxisBounds(normalDir_, localSendAABB.max(normalDir_) - 2 * nGL, localSendAABB.max(normalDir_) - nGL);
+         localRecvAABB.setAxisBounds(normalDir_, localRecvAABB.max(normalDir_) - nGL, localRecvAABB.max(normalDir_));
+      } else { // at minimum of domain -> send inner layers, receive ghost layers
+         localSendAABB.setAxisBounds(normalDir_, localSendAABB.min(normalDir_) + nGL, localSendAABB.min(normalDir_) + 2 * nGL);
+         localRecvAABB.setAxisBounds(normalDir_, localRecvAABB.min(normalDir_), localRecvAABB.min(normalDir_) + nGL);
+      }
+
+      WALBERLA_ASSERT_FLOAT_EQUAL(localSendAABB.volume(), localRecvAABB.volume())
+
+      center += (localShift + normalShiftVector);
+
+      std::array<bool, 3> virtualPeriodicity{false};
+      virtualPeriodicity[normalDir_] = true;
+      virtualPeriodicity[shiftDir_] = true;
+
+      domain_decomposition::mapPointToPeriodicDomain( virtualPeriodicity, sbf->getDomain(), center );
+
+      uint_t rank{};
+
+      BlockID id{};
+
+      const auto blockInformation = sbf->getBlockInformation();
+      WALBERLA_ASSERT(blockInformation.active())
+
+      blockInformation.getProcess(rank, center[0], center[1], center[2]);
+
+      blockInformation.getId(id, center[0], center[1], center[2]);
+
+      // define unique send / receive tags for communication
+
+      const int atMaxTagSend = normalShift < 0 ? 0 : 1;
+      const int atMaxTagRecv = normalShift < 0 ? 1 : 0;
+
+      const int sendTag = ((int_c(blockID.getID()) + int_c(id.getID() * numGlobalBlocks_)) * 2 + atMaxTagSend) * 2 + 0;
+      const int recvTag = ((int_c(id.getID()) + int_c(blockID.getID() * numGlobalBlocks_)) * 2 + atMaxTagRecv) * 2 + 0;
+
+      auto block = sbf->getBlock(blockID);
+
+      sendAABBs_[blockID].emplace_back(mpi::MPIRank(rank), id, globalAABBToLocalCI(localSendAABB, sbf, block), sendTag);
+      recvAABBs_[blockID].emplace_back(mpi::MPIRank(rank), id, globalAABBToLocalCI(localRecvAABB, sbf, block), recvTag);
+
+      WALBERLA_LOG_DETAIL("blockID = " << blockID.getID() << ", normalShift = " << normalShift
+                                       << "\n\tsendRank = " << rank << "\tsendID = " << id.getID() << "\tsendTag = " << sendTag << "\taabb = " << localSendAABB
+                                       << "\n\trecvRank = " << rank << "\trecvID = " << id.getID() << "\trecvTag = " << recvTag << "\taabb = " << localRecvAABB
+      )
+   }
+
+   /*!
+    * Determines which blocks are participating in the boundary condition / communication, and calls the appropriate
+    * functions to extract and save the communication information.
+    */
+   void setupPeriodicity() {
+
+      const auto sbf = blockForest_.lock();
+      WALBERLA_ASSERT_NOT_NULLPTR( sbf )
+
+      auto & blockInformation = sbf->getBlockInformation();
+      WALBERLA_ASSERT(blockInformation.active())
+      std::vector<std::shared_ptr<IBlockID>> globalBlocks;
+      blockInformation.getAllBlocks(globalBlocks);
+      numGlobalBlocks_ = globalBlocks.size();
+
+      // get blocks of current processor
+      std::vector<IBlock*> localBlocks;
+      sbf->getBlocks(localBlocks);
+
+      const auto nGL = real_c(fieldGhostLayers_);
+
+      const bool shiftWholeBlock = (shift_[shiftDir_] % ShiftType(sbf->getNumberOfCells(*localBlocks[0], shiftDir_))) == 0;
+
+      // get f-size
+      {
+         auto* field = localBlocks[0]->getData<FieldType>(fieldID_);
+         WALBERLA_ASSERT_NOT_NULLPTR(field)
+         fSize_ = cell_idx_c(field->fSize());
+      }
+
+      for( auto block : localBlocks ) {
+
+         // get minimal ghost layer region (in normal direction)
+         const auto blockAABB = block->getAABB();
+         const auto blockID = dynamic_cast<Block*>(block)->getId();
+
+         const bool atMin = sbf->atDomainMinBorder(normalDir_, *block);
+         const bool atMax = sbf->atDomainMaxBorder(normalDir_, *block);
+
+         if(atMin) {
+            if(shiftWholeBlock) {
+               processSingleAABB(blockAABB, sbf, nGL, blockID, -1);
+            } else {
+               processDoubleAABB(blockAABB, sbf, nGL, blockID, -1);
+            }
+         }
+         if(atMax)  {
+            if(shiftWholeBlock) {
+               processSingleAABB(blockAABB, sbf, nGL, blockID, +1);
+            } else {
+               processDoubleAABB(blockAABB, sbf, nGL, blockID, +1);
+            }
+         }
+
+      }
+
+   }
+
+   Vector3<cell_idx_t> toCellVector( const Vector3<real_t> & point ) {
+      return Vector3<cell_idx_t >{ cell_idx_c(point[0]), cell_idx_c(point[1]), cell_idx_c(point[2]) };
+   }
+
+   CellInterval globalAABBToLocalCI( const AABB & aabb, const std::shared_ptr<StructuredBlockForest> & sbf, IBlock * const block ) {
+      auto globalCI = CellInterval{toCellVector(aabb.min()), toCellVector(aabb.max()) - Vector3<cell_idx_t>(1, 1, 1)};
+      CellInterval localCI;
+      sbf->transformGlobalToBlockLocalCellInterval(localCI, *block, globalCI);
+
+      return localCI;
+   }
+
+   template< typename tPair >
+   struct second_t {
+      typename tPair::second_type operator()( const tPair& p ) const { return p.second; }
+   };
+   template< typename tMap >
+   second_t< typename tMap::value_type > second( const tMap& ) { return second_t< typename tMap::value_type >(); }
+
+
+   std::weak_ptr<StructuredBlockForest> blockForest_;
+
+   uint_t normalDir_;
+   uint_t shiftDir_;
+   Vector3<ShiftType> shift_;
+
+   // for each local block, stores the ranks where to send / receive, the corresponding block IDs,
+   // the local AABBs that need to be packed / unpacked, and a unique tag for communication
+   std::map<BlockID, std::vector<std::tuple<mpi::MPIRank, BlockID, CellInterval, int>>> sendAABBs_{};
+   std::map<BlockID, std::vector<std::tuple<mpi::MPIRank, BlockID, CellInterval, int>>> recvAABBs_{};
+
+   bool setupPeriodicity_{false};
+   uint_t numGlobalBlocks_{};
+
+ protected:
+
+   const BlockDataID fieldID_;
+
+   const uint_t fieldGhostLayers_;
+   cell_idx_t fSize_;
+
+}; // class ShiftedPeriodicityBase
+
+
+//*******************************************************************************************************************
+/*!
+ * A periodicity boundary condition that adds a user-defined spatial shift to the field when applied.
+ * This shift can prevent the locking of large-scale turbulent features in the flow direction, see e.g.,
+ * Munters et al. (https://doi.org/10.1063/1.4941912).
+ *
+ * Periodicity defined in the blockforest must be turned off in the normal-direction.
+ *
+ * This class handles the CPU-specific packing and unpacking of the communication buffers.
+ *
+ * @tparam GhostLayerField_T Type of the ghost-layer field that is shifted periodically
+ */
+//*******************************************************************************************************************
+template<typename GhostLayerField_T>
+class ShiftedPeriodicity : public ShiftedPeriodicityBase<ShiftedPeriodicity<GhostLayerField_T>, GhostLayerField_T> {
+
+   using Base = ShiftedPeriodicityBase<ShiftedPeriodicity<GhostLayerField_T>, GhostLayerField_T>;
+   friend Base;
+
+ public:
+
+   using ValueType = typename GhostLayerField_T::value_type;
+   using ShiftType = typename Base::ShiftType;
+
+   ShiftedPeriodicity( const std::weak_ptr<StructuredBlockForest> & blockForest,
+                       const BlockDataID & fieldID, const uint_t fieldGhostLayers,
+                      const uint_t normalDir, const uint_t shiftDir, const ShiftType shiftValue )
+      : Base( blockForest, fieldID, fieldGhostLayers, normalDir, shiftDir, shiftValue )
+   {}
+
+ private:
+
+   void packBuffer(IBlock * const block, const CellInterval & ci,
+                   std::vector<ValueType> & buffer) {
+
+      // get field
+      auto field = block->getData<GhostLayerField_T>(this->fieldID_);
+      WALBERLA_ASSERT_NOT_NULLPTR(field)
+
+      auto bufferIt = buffer.begin();
+
+      // forward iterate over ci and add values to value vector
+      for(auto cellIt = ci.begin(); cellIt != ci.end(); ++cellIt) {
+         for(cell_idx_t f = 0; f < this->fSize_; ++f, ++bufferIt) {
+            WALBERLA_ASSERT(field->coordinatesValid(cellIt->x(), cellIt->y(), cellIt->z(), f))
+            *bufferIt = field->get(*cellIt, f);
+         }
+      }
+
+   }
+
+   void unpackBuffer(IBlock* const block, const CellInterval & ci,
+                     const std::vector<ValueType> & buffer) {
+
+      // get field
+      auto field = block->getData<GhostLayerField_T>(this->fieldID_);
+      WALBERLA_ASSERT_NOT_NULLPTR(field)
+
+      auto bufferIt = buffer.begin();
+
+      // forward iterate over ci and add values to value vector
+      for(auto cellIt = ci.begin(); cellIt != ci.end(); ++cellIt) {
+         for(cell_idx_t f = 0; f < this->fSize_; ++f, ++bufferIt) {
+            WALBERLA_ASSERT(field->coordinatesValid(cellIt->x(), cellIt->y(), cellIt->z(), f))
+            field->get(*cellIt, f) = *bufferIt;
+         }
+      }
+
+   }
+
+}; // class ShiftedPeriodicity
+
+} // namespace lbm
+} // namespace walberla
diff --git a/src/gpu/CMakeLists.txt b/src/gpu/CMakeLists.txt
index fb6810d4e9241c476967bd430a9440b7b6eee86f..e870cdb5de4b783d1866fce23297e21f6bedff86 100644
--- a/src/gpu/CMakeLists.txt
+++ b/src/gpu/CMakeLists.txt
@@ -38,6 +38,8 @@ target_sources( gpu
       ParallelStreams.h
       GPURAII.h
       DeviceSelectMPI.cpp
+      ShiftedPeriodicity.cu
+      ShiftedPeriodicity.h
       )
 
 # sources only for CUDA
diff --git a/src/gpu/FieldAccessor.h b/src/gpu/FieldAccessor.h
index d737983d1aa5f289d624cd508eea8a7969a5fe69..e34d9c3c890c2bc316f629fa97f83453bb5c5d68 100644
--- a/src/gpu/FieldAccessor.h
+++ b/src/gpu/FieldAccessor.h
@@ -56,29 +56,29 @@ namespace gpu
               fOffset_(fOffset), indexingScheme_(indexingScheme )
       {}
 
-      __device__ void set( uint3 blockIdx, uint3 threadIdx )
+      __device__ void set( uint3 _blockIdx, uint3 _threadIdx )
       {
          switch ( indexingScheme_)
          {
-            case FZYX: ptr_ += blockIdx.z * fOffset_ + blockIdx.y * zOffset_ + blockIdx.x * yOffset_ + threadIdx.x * xOffset_; break;
-            case FZY : ptr_ +=                         blockIdx.y * fOffset_ + blockIdx.x * zOffset_ + threadIdx.x * yOffset_; break;
-            case FZ  : ptr_ +=                                                 blockIdx.x * fOffset_ + threadIdx.x * zOffset_; break;
-            case F   : ptr_ +=                                                                         threadIdx.x * fOffset_; break;
-
-            case ZYXF: ptr_ += blockIdx.z * zOffset_ + blockIdx.y * yOffset_ + blockIdx.x * xOffset_ + threadIdx.x * fOffset_; break;
-            case ZYX : ptr_ +=                         blockIdx.y * zOffset_ + blockIdx.x * yOffset_ + threadIdx.x * xOffset_; break;
-            case ZY  : ptr_ +=                                                 blockIdx.x * zOffset_ + threadIdx.x * yOffset_; break;
-            case Z   : ptr_ +=                                                                         threadIdx.x * zOffset_; break;
+            case FZYX: ptr_ += _blockIdx.z * fOffset_ + _blockIdx.y * zOffset_ + _blockIdx.x * yOffset_ + _threadIdx.x * xOffset_; break;
+            case FZY : ptr_ +=                          _blockIdx.y * fOffset_ + _blockIdx.x * zOffset_ + _threadIdx.x * yOffset_; break;
+            case FZ  : ptr_ +=                                                   _blockIdx.x * fOffset_ + _threadIdx.x * zOffset_; break;
+            case F   : ptr_ +=                                                                            _threadIdx.x * fOffset_; break;
+
+            case ZYXF: ptr_ += _blockIdx.z * zOffset_ + _blockIdx.y * yOffset_ + _blockIdx.x * xOffset_ + _threadIdx.x * fOffset_; break;
+            case ZYX : ptr_ +=                          _blockIdx.y * zOffset_ + _blockIdx.x * yOffset_ + _threadIdx.x * xOffset_; break;
+            case ZY  : ptr_ +=                                                   _blockIdx.x * zOffset_ + _threadIdx.x * yOffset_; break;
+            case Z   : ptr_ +=                                                                            _threadIdx.x * zOffset_; break;
          }
       }
 
 
-      __device__ uint_t getLinearIndex( uint3 blockIdx, uint3 threadIdx, uint3 gridDim, uint3 blockDim )
+      __device__ uint_t getLinearIndex( uint3 _blockIdx, uint3 _threadIdx, uint3 _gridDim, uint3 _blockDim )
       {
-         return threadIdx.x                              +
-                blockIdx.x * blockDim.x                  +
-                blockIdx.y * blockDim.x * gridDim.x      +
-                blockIdx.z * blockDim.x * gridDim.x * gridDim.y ;
+         return _threadIdx.x                                +
+                _blockIdx.x * _blockDim.x                   +
+                _blockIdx.y * _blockDim.x * _gridDim.x      +
+                _blockIdx.z * _blockDim.x * _gridDim.x * _gridDim.y ;
       }
 
       // This is always true for this specific field indexing class.
diff --git a/src/gpu/FieldAccessor3D.h b/src/gpu/FieldAccessor3D.h
index 66e95f7242ce0c183ab261f7dcd1600a57df10bb..5f32a43c4c110bd0589dc98e65c6cb7b7e8f7bc5 100644
--- a/src/gpu/FieldAccessor3D.h
+++ b/src/gpu/FieldAccessor3D.h
@@ -38,17 +38,17 @@ namespace gpu
                        uint_t yOffset,
                        uint_t zOffset,
                        uint_t fOffset,
-                       const dim3 & idxDim,
-                       const dim3 & blockDim )
+                       const dim3 & _idxDim,
+                       const dim3 & _blockDim )
             : ptr_( ptr ), xOffset_( xOffset ), yOffset_( yOffset ), zOffset_( zOffset ), fOffset_( fOffset ),
-              idxDim_( idxDim ), blockDim_( blockDim )
+              idxDim_( _idxDim ), blockDim_( _blockDim )
       {}
 
-      __device__ __forceinline__ void set( const uint3& blockIdx, const uint3& threadIdx )
+      __device__ __forceinline__ void set( const uint3& _blockIdx, const uint3& _threadIdx )
       {
-         uint_t x = blockIdx.x * blockDim_.x + threadIdx.x;
-         uint_t y = blockIdx.y * blockDim_.y + threadIdx.y;
-         uint_t z = blockIdx.z * blockDim_.z + threadIdx.z;
+         uint_t x = _blockIdx.x * blockDim_.x + _threadIdx.x;
+         uint_t y = _blockIdx.y * blockDim_.y + _threadIdx.y;
+         uint_t z = _blockIdx.z * blockDim_.z + _threadIdx.z;
 
          if ( x < idxDim_.x && y < idxDim_.y && z < idxDim_.z )
          {
diff --git a/src/gpu/FieldAccessorXYZ.h b/src/gpu/FieldAccessorXYZ.h
index d9046a783d3b5c073b03527332cff553edf1e99b..73c7a0a051929b17434b7d79c2a0fd00c3728507 100644
--- a/src/gpu/FieldAccessorXYZ.h
+++ b/src/gpu/FieldAccessorXYZ.h
@@ -37,11 +37,11 @@ namespace gpu
             : ptr_(ptr), xOffset_(xOffset), yOffset_(yOffset), zOffset_(zOffset), fOffset_(fOffset)
       {}
 
-      __device__ void set( uint3 blockIdx, uint3 threadIdx )
+      __device__ void set( uint3 _blockIdx, uint3 _threadIdx )
       {
-         ptr_ += threadIdx.x * xOffset_ +
-                 blockIdx.x  * yOffset_ +
-                 blockIdx.y  * zOffset_ ;
+         ptr_ += _threadIdx.x * xOffset_ +
+                 _blockIdx.x  * yOffset_ +
+                 _blockIdx.y  * zOffset_ ;
       }
 
       __device__ T & get()       { return * (T*)(ptr_);                }
diff --git a/src/gpu/ShiftedPeriodicity.cu b/src/gpu/ShiftedPeriodicity.cu
new file mode 100644
index 0000000000000000000000000000000000000000..d2ae3f70d8aa1336eade55b42cbc32b2695fc4bc
--- /dev/null
+++ b/src/gpu/ShiftedPeriodicity.cu
@@ -0,0 +1,53 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file ShiftedPeriodicity.cu
+//! \ingroup gpu
+//! \author Helen Schottenhamml <helen.schottenhamml@fau.de>
+//
+//======================================================================================================================
+
+#include "gpu/ShiftedPeriodicity.h"
+
+namespace walberla {
+namespace gpu
+{
+
+namespace internal {
+#ifdef WALBERLA_BUILD_WITH_GPU_SUPPORT
+   __global__ void packBufferGPU( gpu::FieldAccessor<real_t> fa, real_t * const buffer ) {
+      fa.set(blockIdx, threadIdx);
+      if(fa.isValidPosition()) {
+         buffer[fa.getLinearIndex(blockIdx, threadIdx, gridDim, blockDim)] = fa.get();
+      }
+   }
+   __global__ void unpackBufferGPU( gpu::FieldAccessor<real_t> fa, const real_t * const buffer ) {
+      fa.set(blockIdx, threadIdx);
+      if(fa.isValidPosition()) {
+         fa.get() = buffer[fa.getLinearIndex(blockIdx, threadIdx, gridDim, blockDim)];
+      }
+   }
+#else
+   __global__ void packBufferGPU( gpu::FieldAccessor<real_t>, real_t * const ) {
+      WALBERLA_ABORT("gpu/ShiftedPeriodicity only supported when built with GPU support. Please use boundary/ShiftedPeriodicity on CPUs")
+   }
+   __global__ void unpackBufferGPU( gpu::FieldAccessor<real_t>, const real_t * const ) {
+      WALBERLA_ABORT("gpu/ShiftedPeriodicity only supported when built with GPU support. Please use boundary/ShiftedPeriodicity on CPUs")
+   }
+#endif
+}
+
+} // namespace gpu
+} // namespace walberla
diff --git a/src/gpu/ShiftedPeriodicity.h b/src/gpu/ShiftedPeriodicity.h
new file mode 100644
index 0000000000000000000000000000000000000000..cf46577c21e688aaa375f15eddfd969e4eb6f669
--- /dev/null
+++ b/src/gpu/ShiftedPeriodicity.h
@@ -0,0 +1,144 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file ShiftedPeriodicity.h
+//! \ingroup gpu
+//! \author Helen Schottenhamml <helen.schottenhamml@fau.de>
+//
+//======================================================================================================================
+
+#include "blockforest/StructuredBlockForest.h"
+
+#include "boundary/ShiftedPeriodicity.h"
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+#include "core/debug/Debug.h"
+#include "core/math/Vector3.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+
+#include "gpu/DeviceWrapper.h"
+#include "gpu/FieldAccessor.h"
+#include "gpu/FieldIndexing.h"
+#include "gpu/GPUField.h"
+#include "gpu/GPUWrapper.h"
+#include "gpu/Kernel.h"
+
+#include <cstdlib>
+#include <device_launch_parameters.h>
+#include <memory>
+#include <vector>
+
+#include "ErrorChecking.h"
+
+namespace walberla {
+namespace gpu
+{
+
+namespace internal {
+   // GPU kernels - can be extended for other data types
+   __global__ void packBufferGPU( gpu::FieldAccessor<real_t> fa, real_t * const buffer );
+   __global__ void unpackBufferGPU( gpu::FieldAccessor<real_t> fa, const real_t * const buffer );
+}
+
+//*******************************************************************************************************************
+/*!
+ * A periodicity boundary condition that adds a user-defined spatial shift to the field when applied.
+ * This shift can prevent the locking of large-scale turbulent features in the flow direction, see e.g.,
+ * Munters et al. (https://doi.org/10.1063/1.4941912).
+ *
+ * Periodicity defined in the blockforest must be turned off in the normal-direction.
+ *
+ * This class handles the GPU-specific packing and unpacking of the communication buffers.
+ *
+ * @tparam GhostLayerField_T Type of the ghost-layer field that is shifted periodically
+ */
+//*******************************************************************************************************************
+template< typename GPUField_T >
+class ShiftedPeriodicityGPU : public boundary::ShiftedPeriodicityBase<ShiftedPeriodicityGPU<GPUField_T>, GPUField_T> {
+
+   using Base = boundary::ShiftedPeriodicityBase<ShiftedPeriodicityGPU<GPUField_T>, GPUField_T>;
+   friend Base;
+
+ public:
+
+   using ValueType = typename GPUField_T::value_type;
+   using ShiftType = typename Base::ShiftType;
+   using FieldIdx_T = gpu::FieldIndexing<ValueType>;
+
+   ShiftedPeriodicityGPU(const std::weak_ptr< StructuredBlockForest >& blockForest,
+                         const BlockDataID& fieldID, const uint_t fieldGhostLayers,
+                         const uint_t normalDir, const uint_t shiftDir, const ShiftType shiftValue)
+      : Base(blockForest, fieldID, fieldGhostLayers, normalDir, shiftDir, shiftValue)
+   {}
+
+
+ private:
+
+   void packBuffer(IBlock* const block, const CellInterval& ci, std::vector< ValueType >& h_buffer) {
+
+      // get field
+      auto d_field = block->getData< GPUField_T >(this->fieldID_);
+      WALBERLA_ASSERT_NOT_NULLPTR(d_field)
+
+      const uint_t nValues = ci.numCells() * uint_c(this->fSize_);
+
+      // create GPU buffer
+      ValueType * d_buffer{};
+      WALBERLA_GPU_CHECK(gpuMalloc(&d_buffer, nValues * sizeof(ValueType)))
+
+      // fill buffer on GPU
+      auto packKernel = gpu::make_kernel( &internal::packBufferGPU );
+      packKernel.addFieldIndexingParam( FieldIdx_T::interval( *d_field, ci, 0, this->fSize_ ) );
+      packKernel.addParam<real_t*>(d_buffer);
+      packKernel();
+
+      // copy from device to host buffer
+      WALBERLA_GPU_CHECK(gpuMemcpy(h_buffer.data(), d_buffer, nValues * sizeof(ValueType), gpuMemcpyDeviceToHost))
+
+      WALBERLA_GPU_CHECK(gpuFree(d_buffer))
+
+   }
+
+   void unpackBuffer(IBlock* const block, const CellInterval& ci, const std::vector< ValueType >& h_buffer) {
+
+      // get field
+      auto d_field = block->getData< GPUField_T >(this->fieldID_);
+      WALBERLA_ASSERT_NOT_NULLPTR(d_field)
+
+      const uint_t nValues = ci.numCells() * uint_c(this->fSize_);
+
+      // create GPU buffer
+      ValueType * d_buffer{};
+      WALBERLA_GPU_CHECK(gpuMalloc(&d_buffer, nValues * sizeof(ValueType)))
+
+      // copy from host to device buffer
+      WALBERLA_GPU_CHECK(gpuMemcpy(d_buffer, h_buffer.data(), nValues * sizeof(ValueType), gpuMemcpyHostToDevice))
+
+      // unpack buffer on GPU
+      auto unpackKernel = gpu::make_kernel( &internal::unpackBufferGPU );
+      unpackKernel.addFieldIndexingParam( FieldIdx_T::interval( *d_field, ci, 0, this->fSize_ ) );
+      unpackKernel.addParam<const real_t*>(d_buffer);
+      unpackKernel();
+
+      WALBERLA_GPU_CHECK(gpuFree(d_buffer))
+   }
+
+}; // class ShiftedPeriodicity
+
+} // namespace gpu
+} // namespace walberla
diff --git a/src/materials/alloys/SS316L/SS316L.py b/src/materials/alloys/SS316L/SS316L.py
index 2ca4198d617609156af31a97669753b2dbb2335a..0283b878066e0bd33adcf4aa1385f7dee84494f4 100644
--- a/src/materials/alloys/SS316L/SS316L.py
+++ b/src/materials/alloys/SS316L/SS316L.py
@@ -1,7 +1,7 @@
 import numpy as np
 import sympy as sp
 from pathlib import Path
-from typing import Union
+from typing import Union, Tuple
 from src.materials.alloys.Alloy import Alloy
 from src.materials.elements import Fe, Cr, Mn, Ni
 from src.materials.models import thermal_diffusivity_by_heat_conductivity, density_by_thermal_expansion, MaterialPropertyWrapper
@@ -72,79 +72,182 @@ def create_SS316L(T: Union[float, sp.Symbol]) -> Alloy:
 
     heat_conductivity_temp_array = celsius_to_kelvin(heat_conductivity_temp_array)
 
+    SS316L.heat_conductivity = interpolate_property(T, heat_conductivity_temp_array, heat_conductivity_array)
+    SS316L.density = interpolate_property(T, density_temp_array, density_array)
+    SS316L.heat_capacity = interpolate_property(T, heat_capacity_temp_array, heat_capacity_array)
+    print("SS316L.heat_conductivity:", SS316L.heat_conductivity, "type:", type(SS316L.heat_conductivity))
+    print("SS316L.density:", SS316L.density, "type:", type(SS316L.density))
+    print("SS316L.heat_capacity:", SS316L.heat_capacity, "type:", type(SS316L.heat_capacity))
+
+    # ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------
+
+    temp_float = 1400.0
+    temp_array = np.array([900.0, 1000.0, 1100.0, 1200.0, 1300.0, 1400.0, 1500.0, 1600.0, 1700.0, 1800.0, 1900.0], dtype=float)
+
+    tec_float = 1.7e-5
+    tec_temperature_array = np.array([973.15, 1073.15, 1173.15, 1273.15, 1373.15, 1395.68, 1415.00, 1435.00, 1455.26, 1500.00, 1550.00])
+    tec_array = np.array([18.0e-6, 19.0e-6, 19.5e-6, 20.0e-6, 21.0e-6, 21.8e-6, 22.0e-6, 22.3e-6, 22.5e-6, 23.5e-6, 24.0e-6])
+    tec = interpolate_property(T, tec_temperature_array, tec_array)
+
+    # Define the corresponding thermal expansion coefficients at these points
+    alpha_solidus = 21.8e-6  # 1/K at solidus
+    alpha_liquidus = 22.5e-6  # 1/K at liquidus
+    alpha_below = 18.0e-6     # 1/K below solidus (approximated starting value)
+    alpha_above = 24.0e-6     # 1/K above liquidus (approximated final value)
+
+    # Fit a cubic polynomial for the thermal expansion coefficient
+    # Define the symbolic variable for temperature
+    Tmp = sp.symbols('Tmp')
+
+    # Define the coefficients for a cubic polynomial: alpha(T) = a*T^3 + b*T^2 + c*T + d
+    a, b, c, d = sp.symbols('a b c d')
+
+    # Create a system of equations based on the known values at solidus, liquidus, and boundary conditions
+    eq1 = sp.Eq(a*SS316L.temperature_solidus**3 + b*SS316L.temperature_solidus**2 + c*SS316L.temperature_solidus + d, alpha_solidus)  # At solidus
+    eq2 = sp.Eq(a*SS316L.temperature_liquidus**3 + b*SS316L.temperature_liquidus**2 + c*SS316L.temperature_liquidus + d, alpha_liquidus)  # At liquidus
+    eq3 = sp.Eq(a*973.15**3 + b*973.15**2 + c*973.15 + d, alpha_below)  # Below solidus (starting point)
+    eq4 = sp.Eq(a*1550**3 + b*1550**2 + c*1550 + d, alpha_above)  # Above liquidus (ending point)
+    # Solve the system for a, b, c, d
+    solution = sp.solve([eq1, eq2, eq3, eq4], (a, b, c, d))
+    # Substitute the solved values into the cubic expression
+    tec_expr = solution[a]*Tmp**3 + solution[b]*Tmp**2 + solution[c]*Tmp + solution[d]
+    print("tec_expr:", tec_expr, "type:", type(tec_expr))
+
+    density_float = 8000.0
+    density_array = density_array
+    # print("density_array:", density_array, "type:", type(density_array))
+    density_temp_array = density_temp_array
+    # print("density_temp_array:", density_temp_array, "type:", type(density_temp_array))
+    density_by_thermal_expansion_float = density_by_thermal_expansion(T, 293.0, 8000.0, tec_float)
+    print("density_by_thermal_expansion_float:", density_by_thermal_expansion_float, "type:", type(density_by_thermal_expansion_float))
+    # density_by_thermal_expansion_array = density_by_thermal_expansion(T, 293.0, 8000.0, tec_array)  # testing density_by_thermal_expansion without array support for tec
+    # print("density_by_thermal_expansion_array:", density_by_thermal_expansion_array, "type:", type(density_by_thermal_expansion_array))  # Does not return a MaterialProperty when the input is an array
+    density_by_thermal_expansion_expr = density_by_thermal_expansion(T, 293.0, 8000.0, tec)
+    print("density_by_thermal_expansion_expr:", density_by_thermal_expansion_expr, "type:", type(density_by_thermal_expansion_expr))
+
+    tec_interpolate_float = interpolate_property(T, tec_temperature_array, tec_array)
+    print("tec_interpolate_float:", tec_interpolate_float, "type:", type(tec_interpolate_float))
+
+    heat_capacity_interpolate_1 = interpolate_property(T, heat_capacity_temp_array, heat_capacity_array)
+    print("heat_capacity_interpolate_1:", heat_capacity_interpolate_1, "type:", type(heat_capacity_interpolate_1))
+
+    # Example values (all floats)
+    float_value = 300.15
+    k_arr = np.array([15.1, 15.0, 14.9, 14.8, 14.7, 14.5, 14.3, 14.2, 14.0, 13.8, 13.7], dtype=float)  # W/m·K
+    rho_arr = np.array([7900.0, 7850.0, 7800.0, 7750.0, 7700.0, 7650.0, 7600.0, 7550.0, 7500.0, 7450.0, 7400.0], dtype=float)  # kg/m³
+    c_p_arr = np.array([500.0, 505.0, 510.0, 515.0, 520.0, 525.0, 530.0, 535.0, 540.0, 545.0, 550.0], dtype=float)  # J/kg·K
+    print(np.size(k_arr), np.size(rho_arr), np.size(c_p_arr))
+
+    diffusivity_1 = thermal_diffusivity_by_heat_conductivity(float_value, float_value, float_value)
+    print("diffusivity_1:", diffusivity_1, "type:", type(diffusivity_1))
+
+    # diffusivity_2 = thermal_diffusivity_by_heat_conductivity(k_arr, rho_arr, c_p_arr)
+    # print("diffusivity_2:", diffusivity_2, "type:", type(diffusivity_2))
+
+    # Thermal Conductivity (W/m·K)
+    k_expr = sp.Piecewise(
+        (15.1 - (T - 900) * (0.4 / (1000 - 900)), T < 1000),
+        (15.0 - (T - 1000) * (0.4 / (1100 - 1000)), (T >= 1000) & (T < 1100)),
+        (14.9 - (T - 1100) * (0.4 / (1200 - 1100)), (T >= 1100) & (T < 1200)),
+        (14.8 - (T - 1200) * (0.4 / (1300 - 1200)), (T >= 1200) & (T < 1300)),
+        (14.7 - (T - 1300) * (0.4 / (1400 - 1300)), (T >= 1300) & (T < 1400)),
+        (14.5 - (T - 1400) * (0.3 / (1500 - 1400)), (T >= 1400) & (T < 1500)),
+        (14.3 - (T - 1500) * (0.3 / (1600 - 1500)), (T >= 1500) & (T < 1600)),
+        (14.2 - (T - 1600) * (0.2 / (1700 - 1600)), (T >= 1600) & (T < 1700)),
+        (14.0 - (T - 1700) * (0.2 / (1800 - 1700)), (T >= 1700) & (T < 1800)),
+        (13.8 - (T - 1800) * (0.1 / (1900 - 1800)), (T >= 1800) & (T < 1900)),
+        (13.7, True)  # for T >= 1900
+    )
+
+    # Density (kg/m³)
+    rho_expr = sp.Piecewise(
+        (7900.0 - (T - 900) * (50.0 / (1000 - 900)), T < 1000),
+        (7850.0 - (T - 1000) * (50.0 / (1100 - 1000)), (T >= 1000) & (T < 1100)),
+        (7800.0 - (T - 1100) * (50.0 / (1200 - 1100)), (T >= 1100) & (T < 1200)),
+        (7750.0 - (T - 1200) * (50.0 / (1300 - 1200)), (T >= 1200) & (T < 1300)),
+        (7700.0 - (T - 1300) * (50.0 / (1400 - 1300)), (T >= 1300) & (T < 1400)),
+        (7650.0 - (T - 1400) * (50.0 / (1500 - 1400)), (T >= 1400) & (T < 1500)),
+        (7600.0 - (T - 1500) * (50.0 / (1600 - 1500)), (T >= 1500) & (T < 1600)),
+        (7550.0 - (T - 1600) * (50.0 / (1700 - 1600)), (T >= 1600) & (T < 1700)),
+        (7500.0 - (T - 1700) * (50.0 / (1800 - 1700)), (T >= 1700) & (T < 1800)),
+        (7450.0 - (T - 1800) * (50.0 / (1900 - 1800)), (T >= 1800) & (T < 1900)),
+        (7400.0, True)  # for T >= 1900
+    )
+
+    # Specific Heat Capacity (J/kg·K)
+    c_p_expr = sp.Piecewise(
+        (500.0 + (T - 900) * (5.0 / (1000 - 900)), T < 1000),
+        (505.0 + (T - 1000) * (5.0 / (1100 - 1000)), (T >= 1000) & (T < 1100)),
+        (510.0 + (T - 1100) * (5.0 / (1200 - 1100)), (T >= 1100) & (T < 1200)),
+        (515.0 + (T - 1200) * (5.0 / (1300 - 1200)), (T >= 1200) & (T < 1300)),
+        (520.0 + (T - 1300) * (5.0 / (1400 - 1300)), (T >= 1300) & (T < 1400)),
+        (525.0 + (T - 1400) * (5.0 / (1500 - 1400)), (T >= 1400) & (T < 1500)),
+        (530.0 + (T - 1500) * (5.0 / (1600 - 1500)), (T >= 1500) & (T < 1600)),
+        (535.0 + (T - 1600) * (5.0 / (1700 - 1600)), (T >= 1600) & (T < 1700)),
+        (540.0 + (T - 1700) * (5.0 / (1800 - 1700)), (T >= 1700) & (T < 1800)),
+        (545.0 + (T - 1800) * (5.0 / (1900 - 1800)), (T >= 1800) & (T < 1900)),
+        (550.0, True)  # for T >= 1900
+    )
+
+    print("helloo")
+    diffusivity_3 = thermal_diffusivity_by_heat_conductivity(k_expr, rho_expr, c_p_expr)
+    print("diffusivity_3:", diffusivity_3, "type:", type(diffusivity_3))
+
+    '''k_inputs = [k_arr, k_expr, float_value]
+    rho_inputs = [rho_arr, rho_expr, float_value]
+    c_p_inputs = [c_p_arr, c_p_expr, float_value]
+
+    # Test all combinations of inputs using three nested loops
+    for i, k in enumerate(k_inputs):
+        for j, rho in enumerate(rho_inputs):
+            for k, c_p in enumerate(c_p_inputs):
+                try:
+                    diffusivity = thermal_diffusivity_by_heat_conductivity(k, rho, c_p)
+                    print(f"Combination k[{i+1}], rho[{j+1}], c_p[{k+1}]: diffusivity = {diffusivity}, type = {type(diffusivity)}")
+                except Exception as e:
+                    print(f"Combination k[{i+1}], rho[{j+1}], c_p[{k+1}]: Error - {e}") '''
+
+    diffusivity_4 = interpolate_property(T, SS316L.solidification_interval(), (4.81e-6, 4.66e-6))
+    print("diffusivity_4:", diffusivity_4, "type:", type(diffusivity_4))
+
+    # diffusivity_5 = interpolate_property(T, temp_array, density_by_thermal_expansion_array)
+    # print("diffusivity_5:", diffusivity_5, "type:", type(diffusivity_5))
+
+    density_1 = density_by_thermal_expansion(SS316L.solidification_interval(), 293.0, 8000.0, 12.0)
+    print("density_1:", density_1)
+
+    # SS316L.thermal_diffusivity = diffusivity_1
+
     # Perform interpolation for each property
     print('density')
     # SS316L.density = interpolate_property(T, density_temp_array, density_array)
-    SS316L.density = MaterialPropertyWrapper(density_by_thermal_expansion(
-        T, Constants.temperature_room, 7940.7476, SS316L.thermal_expansion_coefficient))
     print('SS316L.density: ', SS316L.density)
     print('heat_capacity')
-    SS316L.heat_capacity = interpolate_property(T, heat_capacity_temp_array, heat_capacity_array)
+    # SS316L.heat_capacity = interpolate_property(T, heat_capacity_temp_array, heat_capacity_array)
     print('heat_conductivity')
-    SS316L.heat_conductivity = interpolate_property(T, heat_conductivity_temp_array, heat_conductivity_array)
+    # SS316L.heat_conductivity = interpolate_property(T, heat_conductivity_temp_array, heat_conductivity_array)
 
     # Calculate thermal diffusivity from thermal conductivity, density, and heat capacity
     print('thermal_diffusivity')
-
-    if isinstance(T, sp.Symbol):
-        thermal_diffusivity_array = thermal_diffusivity_by_heat_conductivity(
-            SS316L.heat_conductivity.evalf(T, density_temp_array),
-            SS316L.density.evalf(T, heat_conductivity_temp_array),
-            # SS316L.density.expr,
-            SS316L.heat_capacity.evalf(T, heat_conductivity_temp_array)
-        )
-        print('SS316L.heat_conductivity.evalf(T, density_temp_array): ', (SS316L.heat_conductivity.evalf(T, density_temp_array)))  # <class 'numpy.ndarray'>
-        print('SS316L.density.expr: ', type(SS316L.density.expr))  # <class 'sympy.core.mul.Mul'>
-        print('SS316L.heat_capacity.evalf(T, heat_conductivity_temp_array): ', (SS316L.heat_capacity.evalf(T, heat_conductivity_temp_array)))  # <class 'numpy.ndarray'>
-        print('thermal_diffusivity_array: ', type(thermal_diffusivity_array))  # <class 'sympy.tensor.array.dense_ndim_array.ImmutableDenseNDimArray'>
-
-        # Calculate thermal diffusivity values for each temperature in the array
-        thermal_diffusivity_array1 = np.array([
-            (thermal_diffusivity_by_heat_conductivity(
-                (SS316L.heat_conductivity.evalf(T, temp)),
-                # SS316L.density.evalf(T, temp),
-                SS316L.density.expr,
-                (SS316L.heat_capacity.evalf(T, temp))
-            ))
-            for temp in density_temp_array
-        ])
-
-        print('thermal_diffusivity_array1 type:', type(thermal_diffusivity_array1))
-        print('thermal_diffusivity_array1 dtype:', thermal_diffusivity_array1.dtype)
-        print('thermal_diffusivity_array1 shape:', thermal_diffusivity_array1.shape)
-        print('First few values of thermal_diffusivity_array1:', thermal_diffusivity_array1[:5])
-
-        SS316L.thermal_diffusivity = interpolate_property(T, density_temp_array, thermal_diffusivity_array)
-
-    elif isinstance(T, float):
-    # elif isinstance(T, sp.Symbol):
-        # Calculate thermal diffusivity from thermal conductivity, density, and heat capacity
-        thermal_diffusivity_array2 = []
-        for temp in heat_conductivity_temp_array:
-            k = SS316L.heat_conductivity.evalf(T, temp)
-            rho = SS316L.density.evalf(T, temp)
-            cp = SS316L.heat_capacity.evalf(T, temp)
-            thermal_diffusivity2 = thermal_diffusivity_by_heat_conductivity(k, rho, cp)
-            thermal_diffusivity_array2.append(thermal_diffusivity2)
-
-        # Interpolate thermal diffusivity using the temperature array and calculated values
-        SS316L.thermal_diffusivity = interpolate_property(T, density_temp_array, thermal_diffusivity_array2)
-
-
-    '''SS316L.thermal_diffusivity = interpolate_property(T, density_temp_array,
-                                                      thermal_diffusivity_by_heat_conductivity(
-                                                          SS316L.heat_conductivity.evalf(T, density_temp_array),
-                                                          # SS316L.density.evalf(T, heat_conductivity_temp_array),
-                                                          SS316L.density.expr,
-                                                          SS316L.heat_capacity.evalf(T, heat_conductivity_temp_array)
-                                                      ))'''
+    # print("SS316L.heat_conductivity.evalf(T, density_temp_array):", SS316L.heat_conductivity.evalf(T, density_temp_array))
+
+    SS316L.thermal_diffusivity = thermal_diffusivity_by_heat_conductivity(SS316L.heat_conductivity, SS316L.density, SS316L.heat_capacity)
+        # SS316L.heat_conductivity.expr,
+        # SS316L.density.expr,
+        # SS316L.heat_capacity.expr
+    # )
+    # print('SS316L.heat_conductivity.evalf(T, density_temp_array): ', (SS316L.heat_conductivity.evalf(T, density_temp_array)))  # <class 'numpy.ndarray'>
+    # print('SS316L.density.expr: ', type(SS316L.density.expr))  # <class 'sympy.core.mul.Mul'>
+    # print('SS316L.heat_capacity.evalf(T, heat_conductivity_temp_array): ', (SS316L.heat_capacity.evalf(T, heat_conductivity_temp_array)))  # <class 'numpy.ndarray'>
+    print('SS316L.thermal_diffusivity: ', SS316L.thermal_diffusivity)
+    print("----------" * 10)
 
     return SS316L
 
 
 if __name__ == '__main__':
-    Temp = sp.Symbol('Temp')
-    alloy = create_SS316L(300.15)
+    Temp = sp.Symbol('T')
+    alloy = create_SS316L(Temp)
 
     # Print the composition of each element in the alloy
     for i in range(len(alloy.composition)):
@@ -169,9 +272,9 @@ if __name__ == '__main__':
     print(f"Interpolated heat conductivity at T={test_temp} using evalf: {heat_conductivity_result}")
 
     # Test thermal diffusivity calculation
-    heat_conductivity = 500  # Example value for testing
-    density = 5000  # Example value for testing
-    heat_capacity = 600  # Example value for testing
+    heat_conductivity = 500.  # Example value for testing
+    density = 5000.  # Example value for testing
+    heat_capacity = 600.  # Example value for testing
     thermal_diffusivity = thermal_diffusivity_by_heat_conductivity(
         heat_conductivity, density, heat_capacity)
     print(f"Calculated thermal diffusivity: {thermal_diffusivity}")
diff --git a/src/materials/alloys/Ti6Al4V.py b/src/materials/alloys/Ti6Al4V.py
index 49b6d8ec09b9fbd1fd649b7fdaf056c0ee9e3e22..e033e74550b8d73d0a67847a039e85d148a8b043 100644
--- a/src/materials/alloys/Ti6Al4V.py
+++ b/src/materials/alloys/Ti6Al4V.py
@@ -4,42 +4,12 @@ from src.materials.alloys.Alloy import Alloy
 from src.materials.constants import Constants
 from src.materials.elements import Ti, Al, V
 from src.materials.interpolators import interpolate_equidistant, interpolate_lookup, interpolate_property
-from src.materials.models import density_by_thermal_expansion, thermal_diffusivity_by_heat_conductivity, MaterialPropertyWrapper
-from src.materials.typedefs import MaterialProperty
+from src.materials.models import density_by_thermal_expansion, thermal_diffusivity_by_heat_conductivity
 
 
 def create_Ti6Al4V(T: Union[float, sp.Symbol]) -> Alloy:
     """
     Creates an Alloy instance for Ti6Al4V with specific properties.
-
-    Args:
-        T (Union[float, sp.Symbol]): Temperature as a symbolic variable or numeric value.
-
-    Returns:
-        Alloy: Initialized Ti6Al4V alloy with physical properties including density, heat conductivity, and thermal diffusivity.
-
-    Notes:
-        - **Data Units**: All input data should be in SI units. If data is not in SI units, it must be converted. For example:
-            - **Temperature**: The temperature used in the function is expected to be in Kelvin. If you have temperature data in Celsius, convert it to Kelvin before using it.
-            - **Density**: Should be in kg/m³.
-            - **Heat Capacity**: Should be in J/(kg·K).
-            - **Heat Conductivity**: Should be in W/(m·K).
-        - **Temperature Array Consistency**: Ensure that all temperature arrays used for interpolation (`density_temp_array`, `heat_capacity_temp_array`, `heat_conductivity_temp_array`) have the same length to ensure consistency in property evaluation.
-          The current implementation uses the temperature range from the `solidification_interval` method for all interpolations to maintain consistency.
-        - **Solidus and Liquidus**: The solidus and liquidus temperatures define the temperature range over which the alloy is solid and liquid, respectively. These temperatures are used to determine the range for property interpolation.
-        - **Thermal Diffusivity Calculation**: The thermal diffusivity is computed using the formula:
-
-          Thermal Diffusivity = Heat Conductivity / Density * Heat Capacity
-
-          where:
-            - **Heat Conductivity**: Interpolated value based on temperature.
-            - **Density**: Computed based on thermal expansion.
-            - **Heat Capacity**: Provided directly.
-
-    Example:
-        If you have temperature data in Celsius and property data in non-SI units, convert the temperature to Kelvin and property values to SI units before using them.
-        For Ti6Al4V, the alloy properties are computed based on the provided temperature and constants, ensuring accurate representation of the material's behavior.
-
     """
     # Define the Ti6Al4V alloy with its elemental composition and phase transition temperatures
     Ti6Al4V = Alloy(
@@ -48,34 +18,35 @@ def create_Ti6Al4V(T: Union[float, sp.Symbol]) -> Alloy:
         temperature_solidus=1878.0,             # Temperature solidus in Kelvin
         temperature_liquidus=1928.0,            # Temperature liquidus in Kelvin
         thermal_expansion_coefficient=8.6e-6,   # 1/K  [Source: MatWeb]
-        heat_capacity=526,                      # J/(kg·K) [Source: MatWeb]
+        heat_capacity=526.0,                      # J/(kg·K) [Source: MatWeb]
         latent_heat_of_fusion=290000.0,         # J/kg [Source: MatWeb]
         latent_heat_of_vaporization=8.86e6      # J/kg [Source: MatWeb]
     )
 
     # Compute density based on thermal expansion and other constants
-    Ti6Al4V.density = MaterialPropertyWrapper(density_by_thermal_expansion(
-        T, Constants.temperature_room, 4430, Ti6Al4V.thermal_expansion_coefficient))
+    Ti6Al4V.density = density_by_thermal_expansion(
+        T, Constants.temperature_room, 4430, Ti6Al4V.thermal_expansion_coefficient)
+    print("Ti6Al4V.density:", Ti6Al4V.density)
 
     # Interpolate heat conductivity based on temperature
     Ti6Al4V.heat_conductivity = interpolate_property(
-        T, Ti6Al4V.solidification_interval(), [6.7, 32.0])  # W/(m·K)
-
-    # Calculate thermal diffusivity using conductivity, density, and heat capacity
-    Ti6Al4V.thermal_diffusivity = interpolate_property(T, Ti6Al4V.solidification_interval(),
-                                                       thermal_diffusivity_by_heat_conductivity(
-                                                           Ti6Al4V.heat_conductivity.evalf(T, Ti6Al4V.solidification_interval()),
-                                                           Ti6Al4V.density.expr,
-                                                           # Ti6Al4V.density.evalf(T, Ti6Al4V.solidification_interval()),
-                                                           Ti6Al4V.heat_capacity
-                                                       ))
+        T, Ti6Al4V.solidification_interval(), (6.7, 32.0))  # W/(m·K)
+    print("Ti6Al4V.heat_conductivity:", Ti6Al4V.heat_conductivity)
+    # print("Ti6Al4V.heat_conductivity.evalf(T, Ti6Al4V.solidification_interval()):", Ti6Al4V.heat_conductivity.evalf(T, Ti6Al4V.solidification_interval()))
+    print("Ti6Al4V.heat_capacity:", Ti6Al4V.heat_capacity)
+
+    k_evalf = Ti6Al4V.heat_conductivity.evalf(T, Ti6Al4V.solidification_interval())
+    print("k_evalf:", k_evalf, "type:", type(k_evalf))
+    rho_evalf = Ti6Al4V.density.evalf(T, Ti6Al4V.solidification_interval())
+    print("rho_evalf:", rho_evalf, "type:", type(rho_evalf))
+    # c_p_evalf = Ti6Al4V.heat_capacity.evalf(T, Ti6Al4V.solidification_interval())  # AttributeError: 'float' object has no attribute 'evalf'
+    # print("c_p_evalf:", c_p_evalf, "type:", type(c_p_evalf))
+
+    Ti6Al4V.thermal_diffusivity = thermal_diffusivity_by_heat_conductivity(Ti6Al4V.heat_conductivity, Ti6Al4V.density, Ti6Al4V.heat_capacity)
+    # Ti6Al4V.thermal_diffusivity = interpolate_property(T, Ti6Al4V.solidification_interval(), k_evalf)  # only works with alloy = create_Ti6Al4V(Temp)
+    print("Ti6Al4V.thermal_diffusivity:", Ti6Al4V.thermal_diffusivity, "type:", type(Ti6Al4V.thermal_diffusivity))
+    print("----------" * 10)
 
-    print('thermal_diffusivity_by_heat_conductivity: ', thermal_diffusivity_by_heat_conductivity(
-        Ti6Al4V.heat_conductivity.evalf(T, Ti6Al4V.solidification_interval()),
-        Ti6Al4V.density.expr,
-        # Ti6Al4V.density.evalf(T, Ti6Al4V.solidification_interval()),
-        Ti6Al4V.heat_capacity
-    ))
     return Ti6Al4V
 
 
@@ -97,7 +68,7 @@ if __name__ == '__main__':
     print("\nTesting interpolate_equidistant:")
     test_temp = 1850.0  # Example temperature value
     result = interpolate_equidistant(
-        Temp, 1820, 20.0, [700, 800, 900, 1000, 1100, 1200])
+        Temp, 1820.0, 20.0, [700., 800., 900., 1000., 1100., 1200.])
     print(f"Interpolated heat capacity at T={test_temp} using interpolate_equidistant: {result.expr}")
     print(f"Assignments: {result.assignments}")
 
@@ -114,9 +85,9 @@ if __name__ == '__main__':
     print(f"Interpolated value using interpolate_lookup: {result_lookup}")
 
     # Test thermal diffusivity calculation
-    heat_conductivity = 500  # Example value for testing
-    density = 5000  # Example value for testing
-    heat_capacity = 600  # Example value for testing
+    heat_conductivity = 500.  # Example value for testing
+    density = 5000.  # Example value for testing
+    heat_capacity = 600.  # Example value for testing
     thermal_diffusivity = thermal_diffusivity_by_heat_conductivity(
         heat_conductivity, density, heat_capacity)
     print(f"Calculated thermal diffusivity: {thermal_diffusivity}")
diff --git a/src/materials/assignment_converter.py b/src/materials/assignment_converter.py
index e397304cad5ad714bbc7799fbae475013e7223fb..690cd4f83b3d2569455fdd0a9afce02b1802ff2e 100644
--- a/src/materials/assignment_converter.py
+++ b/src/materials/assignment_converter.py
@@ -3,6 +3,7 @@ import sympy as sp
 import pystencils as ps
 from typing import List, Dict, Tuple, Union
 from pystencils.types.quick import Arr, Fp
+from pystencils.types import create_type
 from pystencils.sympyextensions.typed_sympy import CastFunc
 from src.materials.typedefs import Assignment, ArrayTypes
 
@@ -27,9 +28,9 @@ def type_mapping(type_str: str, length: int) -> Union[np.dtype, Arr]:
         dtype('float64')
     """
     if type_str == "double[]":
-        return Arr(Fp(64, const=True), length=length)  # 64-bit floating point array
+        return Arr(Fp(64, const=True), length)  # 64-bit floating point array
     elif type_str == "float[]":
-        return Arr(Fp(32, const=True), length=length)  # 32-bit floating point array
+        return Arr(Fp(32, const=True), length)  # 32-bit floating point array
     elif type_str == "double":
         return np.dtype('float64')
     elif type_str == "float":
diff --git a/src/materials/interpolators.py b/src/materials/interpolators.py
index 9cb496430790a7582c34b62d05077504da682cbd..6f86c35d9c58da6302343fce8cd9ebe699faaaa7 100644
--- a/src/materials/interpolators.py
+++ b/src/materials/interpolators.py
@@ -11,7 +11,6 @@ def interpolate_equidistant(
         T: Union[float, sp.Symbol],
         T_base: float,
         T_incr: float,
-        # v_array: ArrayTypes) -> Union[float, MaterialProperty]:  # PropertyTypes
         v_array: ArrayTypes) -> MaterialProperty:
     """
     Perform equidistant interpolation for symbolic or numeric temperature values.
@@ -20,8 +19,7 @@ def interpolate_equidistant(
     :param T_base: Base temperature for the interpolation.
     :param T_incr: Increment in temperature for each step in the interpolation array.
     :param v_array: Array of values corresponding to the temperatures.
-    :return: Interpolated value as a float or MaterialProperty object.
-    :raises ValueError: If the temperature type is unsupported.
+    :return: Interpolated value as a MaterialProperty object.
     """
     if T_incr < 0:
         T_incr *= -1
@@ -48,27 +46,21 @@ def interpolate_equidistant(
             (Wrapper(v_array[-1]), T >= T_base + (len(v_array) - 1) * T_incr),
             ((1 - sym_dec) * sym_data[sym_idx] + sym_dec * sym_data[sym_idx + 1], True)
         )
-        print('sym_data, type(sym_data): ', sym_data, type(sym_data))
         sub_expressions = [
-            Assignment(sym_data, tuple(Wrapper(v) for v in v_array), "double[]"),
+            Assignment(sym_data, tuple(Wrapper(v_array)), "double[]"),
             Assignment(sym_idx, pos, "int"),
             Assignment(sym_dec, pos_dec, "double")
         ]
         return MaterialProperty(result, sub_expressions)
 
     elif isinstance(T, float):
-        v_array = np.asarray(v_array)
         n = len(v_array)
 
         min_temp = T_base
         max_temp = T_base + (n - 1) * T_incr
         if T < min_temp:
-            # return float(v_array[0])
-            # return MaterialProperty(sp.Float(float(v_array[0])), [])
             return MaterialPropertyWrapper(v_array[0])
         elif T > max_temp:
-            # return float(v_array[-1])
-            # return MaterialProperty(sp.Float(float(v_array[-1])), [])
             return MaterialPropertyWrapper(v_array[-1])
 
         pos = (T - T_base) / T_incr
@@ -76,8 +68,6 @@ def interpolate_equidistant(
         pos_dec = pos - pos_int
 
         value = (1 - pos_dec) * v_array[pos_int] + pos_dec * v_array[pos_int + 1]
-        # return float(value)
-        # return MaterialProperty(sp.Float(float(value)), [])
         return MaterialPropertyWrapper(value)
 
     else:
@@ -87,7 +77,6 @@ def interpolate_equidistant(
 def interpolate_lookup(
         T: Union[float, sp.Symbol],
         T_array: ArrayTypes,
-        # v_array: ArrayTypes) -> Union[float, MaterialProperty]:  # PropertyTypes
         v_array: ArrayTypes) -> MaterialProperty:
     """
     Perform lookup-based interpolation for symbolic or numeric temperature values.
@@ -106,8 +95,14 @@ def interpolate_lookup(
         v_array = np.flip(v_array)
 
     if isinstance(T, sp.Symbol):
-        T_array = [Wrapper(t) for t in T_array]
-        v_array = [Wrapper(v) for v in v_array]
+        global COUNT
+        label = '_' + str(COUNT).zfill(3)
+        COUNT += 1
+
+        sym_expr = sp.Symbol(label + "_expr")
+
+        T_array = Wrapper(T_array)
+        v_array = Wrapper(v_array)
 
         conditions = [
             (v_array[0], T < T_array[0]),
@@ -117,24 +112,20 @@ def interpolate_lookup(
                     v_array[i] + (v_array[i + 1] - v_array[i]) / (T_array[i + 1] - T_array[i]) * (T - T_array[i]))
             conditions.append(
                 (interp_expr, sp.And(T >= T_array[i], T < T_array[i + 1]) if i + 1 < len(T_array) - 1 else True))
-        return MaterialProperty(sp.Piecewise(*conditions))
+        sub_expressions = [
+            Assignment(sym_expr, sp.Piecewise(*conditions), "double"),
+        ]
+        # return MaterialProperty(sp.Piecewise(*conditions))
+        return MaterialProperty(sym_expr, sub_expressions)
 
     elif isinstance(T, float):
-        T_array = np.asarray(T_array)
-        v_array = np.asarray(v_array)
         if T < T_array[0]:
-            # return float(v_array[0])
-            #nreturn MaterialProperty(sp.Float(float(v_array[0])), [])
             return MaterialPropertyWrapper(v_array[0])
         elif T > T_array[-1]:
-            # return float(v_array[-1])
-            # return MaterialProperty(sp.Float(float(v_array[-1])), [])
             return MaterialPropertyWrapper(v_array[-1])
         else:
-            # return float(np.interp(T, T_array, v_array))
-            # return MaterialProperty(sp.Float(float(np.interp(T, T_array, v_array))), [])
             v = np.interp(T, T_array, v_array)
-            return MaterialPropertyWrapper(v)
+            return MaterialPropertyWrapper(float(v))
     else:
         raise ValueError(f"Invalid input for T: {type(T)}")
 
@@ -157,7 +148,6 @@ def interpolate_property(
         temp_array: ArrayTypes,
         prop_array: ArrayTypes,
         temp_array_limit: int = 6,
-        # force_lookup=False) -> Union[float, MaterialProperty]:  # PropertyTypes
         force_lookup: bool = False) -> MaterialProperty:
     """
     Perform interpolation based on the type of temperature array (equidistant or not).
@@ -172,10 +162,10 @@ def interpolate_property(
     """
     # Ensure that temp_array and prop_array are arrays
     if not isinstance(temp_array, (Tuple, List, np.ndarray)):
-        raise TypeError(f"Expected temp_array to be a list or array, got {type(temp_array)}")
+        raise TypeError(f"Expected temp_array to be a list or array or tuple, got {type(temp_array)}")
 
     if not isinstance(prop_array, (Tuple, List, np.ndarray)):
-        raise TypeError(f"Expected prop_array to be a list or array, got {type(prop_array)}")
+        raise TypeError(f"Expected prop_array to be a list or array or tuple, got {type(prop_array)}")
 
     if len(temp_array) != len(prop_array):
         raise ValueError("T_array and v_array must have the same length")
@@ -198,8 +188,8 @@ def interpolate_property(
 if __name__ == '__main__':
     # Example usage and consistency tests for the interpolation functions
     Temp = sp.Symbol('Temp')
-    density_temp_array1 = np.flip(np.array([600.0, 500.0, 400.0, 300.0, 200.0]))
-    density_v_array1 = np.flip(np.array([10.0, 20.0, 30.0, 40.0, 50.0]))
+    density_temp_array1 = np.flip(np.asarray([600.0, 500.0, 400.0, 300.0, 200.0]))
+    density_v_array1 = np.flip(np.asarray([10.0, 20.0, 30.0, 40.0, 50.0]))
     density_temp_array2 = [1878.0, 1884.2, 1894.7, 1905.3, 1915.8, 1926.3, 1928.0]
     density_v_array2 = [4236.3, 4234.9, 4233.6, 4232.3, 4230.9, 4229.7, 4227.0]
     Temp_base = 200.0
@@ -231,8 +221,8 @@ if __name__ == '__main__':
     print("\nConsistency Test between interpolate_lookup and interpolate_equidistant:")
     test_temps = [150.0, 200.0, 350.0, 450.0, 550.0, 650.0]
     for TT in test_temps:
-        lookup_result = interpolate_lookup(TT, np.flip(np.array([600.0, 500.0, 400.0, 300.0, 200.0])), np.array([10.0, 20.0, 30.0, 40.0, 50.0]))
-        equidistant_result = interpolate_equidistant(TT, 200.0, 100.0, np.array([10.0, 20.0, 30.0, 40.0, 50.0]))
+        lookup_result = interpolate_lookup(TT, np.flip(np.asarray([600.0, 500.0, 400.0, 300.0, 200.0])), np.asarray([10.0, 20.0, 30.0, 40.0, 50.0]))
+        equidistant_result = interpolate_equidistant(TT, 200.0, 100.0, np.asarray([10.0, 20.0, 30.0, 40.0, 50.0]))
         print(f"Temperature {TT}:")
         print(f"interpolate_lookup result: {lookup_result.expr}")
         print(f"interpolate_equidistant result: {equidistant_result.expr}")
diff --git a/src/materials/models.py b/src/materials/models.py
index 56014a9752a79bf0c0a7c34d461c75808d2fb88d..1c13b1c6880192526afe3e7c7e09d8fbdf1f2dff 100644
--- a/src/materials/models.py
+++ b/src/materials/models.py
@@ -1,134 +1,127 @@
 import numpy as np
 import sympy as sp
 from typing import Union, List
-from src.materials.typedefs import MaterialProperty
+from src.materials.typedefs import MaterialProperty, ArrayTypes
 
 
-def Wrapper(value: Union[sp.Expr, int, float, np.ndarray]) -> Union[sp.Expr, List[sp.Expr]]:
+def Wrapper(value: Union[sp.Expr, float, np.float32, np.float64, ArrayTypes]) -> Union[sp.Expr, List[sp.Expr]]:
     if isinstance(value, sp.Expr):
-        return sp.sympify(value)
-    if isinstance(value, (int, float)):
+        return sp.simplify(value)
+    if isinstance(value, (float, np.float32, np.float64)):
         return sp.Float(float(value))
-    if isinstance(value, np.ndarray):
+    if isinstance(value, ArrayTypes):  # Handles lists, tuples, or arrays
         return [sp.Float(float(v)) for v in value]
-        # return [Wrapper(v) for v in value]
-    raise ValueError("Unsupported type for value in Wrapper")
+    raise ValueError(f"Unsupported type for value in Wrapper: {type(value)}")
 
 
-'''def MaterialPropertyWrapper(value: Union[sp.Expr, int, float, np.ndarray]) -> MaterialProperty:
-    wrapped_value = Wrapper(value)
-    if isinstance(wrapped_value, list):
-        # Instead of returning a list of MaterialProperties, wrap the entire array
-        return MaterialProperty(np.array(wrapped_value))
-    return MaterialProperty(wrapped_value)'''
-
-
-def MaterialPropertyWrapper(value: Union[sp.Expr, int, float, np.ndarray]) -> MaterialProperty:
+def MaterialPropertyWrapper(value: Union[sp.Expr, float, np.float32, np.float64, ArrayTypes]) -> MaterialProperty:
     wrapped_value = Wrapper(value)
     return MaterialProperty(wrapped_value)
 
 
 def density_by_thermal_expansion(
-        temperature: Union[float, np.ndarray, sp.Expr],
+        temperature: Union[float, ArrayTypes, sp.Expr],  # changed temperature from np.ndarray to ArrayType
         temperature_base: float,
         density_base: float,
-        thermal_expansion_coefficient: Union[float, np.ndarray, sp.Expr]) \
-        -> Union[float, np.ndarray, sp.Expr]:
+        thermal_expansion_coefficient: Union[float, MaterialProperty]) \
+        -> MaterialProperty:  # removed np.ndarray for tec, added MaterialProperty for tec, since thermal_expansion_coefficient: PropertyTypes
+        # the return type is no longer Union[float, np.ndarray, sp.Expr] but just a MaterialProperty
+    from src.materials.interpolators import interpolate_property
     """
     Calculate density based on thermal expansion using the formula:
     rho(T) = rho_0 / (1 + tec * (T - T_0))^3
+    """
+    if isinstance(temperature, ArrayTypes):
+        temperature = np.asarray(temperature)
 
-    Where:
-        - rho(T) is the density at temperature T.
-        - rho_0 is the density at the base temperature T_0.
-        - tec is the thermal expansion coefficient.
+    if isinstance(temperature, ArrayTypes) and isinstance(thermal_expansion_coefficient, MaterialProperty):
+        raise TypeError(f"Incompatible combination of temperature (type:{type(temperature)}) and thermal expansion coefficient ({type(thermal_expansion_coefficient)})")
 
-    :param temperature: Temperature value. Can be a numeric value (float), numpy array (np.ndarray), or a symbolic expression (sp.Expr).
-    :param temperature_base: Reference temperature at which the base density is given, as a float.
-    :param density_base: Density at the reference temperature, as a float.
-    :param thermal_expansion_coefficient: Thermal expansion coefficient. Can be a numeric value (float), numpy array (np.ndarray), or a symbolic expression (sp.Expr).
-    :return: Density at temperature T. Returns a float if all inputs are numeric; otherwise, returns a numpy array or a sympy expression.
-    """
-    # Input validation
-    for param, name in zip([temperature, temperature_base, density_base, thermal_expansion_coefficient], ['temperature', 'temperature_base', 'density_base', 'thermal_expansion_coefficient']):
-        if isinstance(param, (float, int)) and param <= 0:
-            raise ValueError(f"{name.capitalize()} must be positive.")
-        if isinstance(param, np.ndarray) and (param <= 0).any():
-            raise ValueError(f"{name.capitalize()} array contains non-positive values.")
+    if isinstance(thermal_expansion_coefficient, float):
+        density = density_base * (1 + thermal_expansion_coefficient * (temperature - temperature_base)) ** (-3)
+        if isinstance(density, np.ndarray):
+            T_dbte = sp.Symbol('T_dbte')
+            density = interpolate_property(T_dbte, temperature, density)
+        else:
+            isinstance(density, (float, sp.Expr))
+            density = MaterialPropertyWrapper(density)
+        return density
 
-    if isinstance(temperature, sp.Symbol) and isinstance(thermal_expansion_coefficient, np.ndarray):
-        raise ValueError("This combination is not valid. Provide the temperature data which is used for the thermal_expansion_coefficient and create a new interpolation material property.")
-    result = density_base * (1 + thermal_expansion_coefficient * (temperature - temperature_base)) ** (-3)
-    return result
+    else:  # isinstance(thermal_expansion_coefficient, MaterialProperty)
+        tec_expr = thermal_expansion_coefficient.expr  # extract the expr (sp.Expr) for tec
+        density = density_base * (1 + tec_expr * (temperature - temperature_base)) ** (-3)
+        return MaterialPropertyWrapper(density)
 
 
 def thermal_diffusivity_by_heat_conductivity(
-        heat_conductivity: Union[float, np.ndarray, sp.Expr],
-        density: Union[float, np.ndarray, sp.Expr],
-        heat_capacity: Union[float, np.ndarray, sp.Expr]) \
-        -> Union[float, np.ndarray, sp.Expr]:
+        heat_conductivity: Union[float, MaterialProperty],
+        density: Union[float, MaterialProperty],
+        heat_capacity: Union[float, MaterialProperty]) \
+        -> MaterialProperty:
     """
     Calculate thermal diffusivity using the formula:
     alpha(T) = k(T) / (rho(T) * c_p(T))
-
-    Where:
-        - alpha(T) is the thermal diffusivity at temperature T.
-        - k(T) is the thermal conductivity at temperature T.
-        - rho(T) is the density at temperature T.
-        - c_p(T) is the specific heat capacity at temperature T.
-
-    :param heat_conductivity: Thermal conductivity. Can be a numeric value (float), numpy array (np.ndarray), or a symbolic expression (sp.Expr).
-    :param density: Density. Can be a numeric value (float), numpy array (np.ndarray), or a symbolic expression (sp.Expr).
-    :param heat_capacity: Specific heat capacity. Can be a numeric value (float), numpy array (np.ndarray), or a symbolic expression (sp.Expr).
-    :return: Thermal diffusivity. Returns a float if all inputs are numeric; otherwise, returns a numpy array or a sympy expression.
     """
-    # Input validation
-    for param, name in zip([heat_conductivity, density, heat_capacity], ['heat_conductivity', 'density', 'heat_capacity']):
-        if isinstance(param, (float, int)) and param <= 0:
-            raise ValueError(f"{name.capitalize()} must be positive.")
-        if isinstance(param, np.ndarray) and (param <= 0).any():
-            raise ValueError(f"{name.capitalize()} array contains non-positive values.")
-
-    if isinstance(density, sp.Piecewise) or isinstance(heat_conductivity, sp.Piecewise) or isinstance(heat_capacity, sp.Piecewise):
-        raise ValueError("Do not insert piecewise functions here. Prepare the data with corresponding temperature intervals.")
-
-    result = heat_conductivity / (density * heat_capacity)
-    return result
+    # Input validation to check for incompatible data types
+    incompatible_inputs = []
+    if isinstance(heat_conductivity, np.ndarray):
+        incompatible_inputs.append(f"heat_conductivity (type: {type(heat_conductivity)})")
+    if isinstance(density, np.ndarray):
+        incompatible_inputs.append(f"density (type: {type(density)})")
+    if isinstance(heat_capacity, np.ndarray):
+        incompatible_inputs.append(f"heat_capacity (type: {type(heat_capacity)})")
+    if incompatible_inputs:
+        raise TypeError(f"Incompatible input data type(s): {', '.join(incompatible_inputs)}")
+
+    sub_assignments = [
+        assignment for prop in [heat_conductivity, density, heat_capacity]
+        if isinstance(prop, MaterialProperty)
+        for assignment in prop.assignments
+    ]
+    # Handle the symbolic expression, using `.expr` only for MaterialProperty objects
+    k_expr = heat_conductivity.expr if isinstance(heat_conductivity, MaterialProperty) else Wrapper(heat_conductivity)
+    rho_expr = density.expr if isinstance(density, MaterialProperty) else Wrapper(density)
+    cp_expr = heat_capacity.expr if isinstance(heat_capacity, MaterialProperty) else Wrapper(heat_capacity)
+
+    result = k_expr / (rho_expr * cp_expr)
+    result = sp.simplify(result)  # computationally expensive
+    return MaterialProperty(result, sub_assignments)
 
 
 if __name__ == '__main__':
     # Test 1: Single temperature value for density calculation
-    T_0 = 800
-    T_1 = 1000
+    T_0 = 800.
+    T_1 = 1000.
     tec = 1e-6
-    rho = 8000
+    rho = 8000.
     print(f"Test 1 - Single temperature value: {T_1}, Density: {density_by_thermal_expansion(T_1, T_0, rho, tec)}")
     # Expected output: Density value at T_1 = 1000
 
     # Test 2: Temperature array for density calculation with constant TEC
     T_a = np.linspace(1000, 2000, 3)
     tec_a = np.ones(T_a.shape) * tec
+    print("-----", type(Wrapper(tec_a)))
     print(f"Test 2a - Temperature array with constant TEC: {T_a}, Density: {density_by_thermal_expansion(T_a, T_0, rho, tec)}")
     # Expected output: Array of densities for temperatures in T_a
 
     # Test 3: Temperature array for density calculation with array TEC
-    print(f"Test 2b - Temperature array with array TEC: {T_a}, Density: {density_by_thermal_expansion(T_a, T_0, rho, tec_a)}")
+    print(f"Test 2b - Temperature array with array TEC: {T_a}, Density: {density_by_thermal_expansion(T_a, T_0, rho, tec)}")
     # Expected output: Array of densities with temperature-dependent TEC
 
     # Test 4: Thermal diffusivity calculation with scalar values
-    k = 30
-    c_p = 600
+    k = 30.
+    c_p = 600.
     print(f"Test 3 - Thermal diffusivity with scalar values: {thermal_diffusivity_by_heat_conductivity(k, rho, c_p)}")
     # Expected output: Scalar value of thermal diffusivity
 
     # Test 5: Thermal diffusivity calculation with array values for heat conductivity
     k_a = np.linspace(30, 40, 3)
-    print(f"Test 4a - Thermal diffusivity with array of heat conductivity: {thermal_diffusivity_by_heat_conductivity(k_a, rho, c_p)}")
+    print(f"Test 4a - Thermal diffusivity with array of heat conductivity: {thermal_diffusivity_by_heat_conductivity(k, rho, c_p)}")
     # Expected output: Array of thermal diffusivity
 
     # Test 6: Thermal diffusivity with density calculated by thermal expansion
     calculated_densities = density_by_thermal_expansion(T_a, T_0, rho, tec)
-    print(f"Test 4b - Thermal diffusivity with calculated densities: {thermal_diffusivity_by_heat_conductivity(k_a, calculated_densities, c_p)}")
+    print(f"Test 4b - Thermal diffusivity with calculated densities: {thermal_diffusivity_by_heat_conductivity(k, calculated_densities, c_p)}")
     # Expected output: Array of thermal diffusivity considering temperature-dependent density
 
     # Test 7: Symbolic computation for density with sympy Symbol temperature
@@ -168,20 +161,6 @@ if __name__ == '__main__':
     except Exception as e:
         print(f"Error in thermal diffusivity calculation with mixed inputs: {str(e)}")
 
-    # Test case for all symbolic inputs
-    print("\nTest 10: All symbolic inputs")
-    T_sym = sp.Symbol('T')
-    tec_sym = sp.Symbol('alpha')
-    k_sym = sp.Symbol('k')
-    rho_sym = sp.Symbol('rho')
-    cp_sym = sp.Symbol('cp')
-
-    density_sym = density_by_thermal_expansion(T_sym, T_base, rho_base, tec_sym)
-    print(f"Symbolic density: {density_sym}")
-
-    diffusivity_sym = thermal_diffusivity_by_heat_conductivity(k_sym, rho_sym, cp_sym)
-    print(f"Symbolic thermal diffusivity: {diffusivity_sym}")
-
     # Test 11: Edge cases with zero and negative values
     print("\nTest 11: Edge cases with zero and negative values")
     try:
diff --git a/src/ray_tracer/__init__.py b/src/ray_tracer/__init__.py
index 57f07dd6671c593d9a4d36ea57839ff187b0a416..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644
--- a/src/ray_tracer/__init__.py
+++ b/src/ray_tracer/__init__.py
@@ -1,187 +0,0 @@
-import numpy as np
-
-# Constants and parameters
-pi = np.pi
-e = 1.60217662e-19  # Elementary charge
-atomic_number = 21.5
-
-# User-defined parameters
-beam_current = 3e-3  # Beam current in Amperes
-dwell_time = 0.4e-6  # Dwell time in seconds
-transimpedance = 1e5  # Transimpedance in Volts per Ampere
-
-# Geometry
-build_surface_width = 80e-3  # Build surface width in meters
-num_scan_lines = 1500  # Number of scan lines
-pixel_size = 53.3e-6  # Pixel size in meters
-detector_height = 272e-3  # Detector height in meters
-detector_spacing = 127e-3  # Detector spacing in meters
-
-# Scattering function parameters
-theta = np.deg2rad(0)
-k = np.deg2rad(theta / 13)  # Scattering lobe width parameter
-
-# Define domain size
-dx = 5e-6  # Cell size in meters
-dy = dx
-dz = dx
-x_min, x_max = -250e-6, 250e-6
-y_min, y_max = -250e-6, 250e-6
-z_min, z_max = 0, 20e-6
-x_num = int((x_max - x_min) / dx) + 1
-y_num = int((y_max - y_min) / dy) + 1
-z_num = int((z_max - z_min) / dz) + 1
-
-# Initialize the domain array
-domain = np.zeros((z_num, y_num, x_num))
-domain[:, :, 0:2] = 1  # Solid cells
-domain[:, :, 2] = 0.5  # Interface layer
-
-# Calculate gradients to determine surface normals
-fill_level = domain
-grad_x = np.gradient(fill_level, axis=2)
-grad_y = np.gradient(fill_level, axis=1)
-grad_z = np.gradient(fill_level, axis=0)
-n_s = np.stack((grad_x, grad_y, grad_z), axis=-1)
-norm = np.linalg.norm(n_s, axis=-1)
-
-interface_mask = (fill_level > 0) & (fill_level < 1)
-nonzero_mask = norm > 0
-valid_mask = interface_mask & nonzero_mask
-n_s[valid_mask] /= norm[valid_mask][..., np.newaxis]
-n_s[fill_level == 1] = np.array([0, 0, -1])
-n_s[fill_level == 0] = np.array([0, 0, 1])
-
-# Beam profile calculation
-sigma = 300e-6 / (2 * dx)
-x = np.arange(-30, 31)
-y = np.arange(-30, 31)
-x, y = np.meshgrid(x, y)
-beam_profile = np.exp(-(x ** 2 + y ** 2) / (2 * sigma ** 2))
-beam_profile /= np.sum(beam_profile)
-
-# Extend beam_profile to 3D to match the domain shape
-beam_profile_3d = np.tile(beam_profile[:, :, np.newaxis], (1, 1, z_num))
-
-# Extend beam profile to 3D to match domain shape
-'''beam_profile_3d = np.zeros((z_num, y_num, x_num))
-beam_profile_3d[z_num // 2 - beam_profile.shape[0] // 2:z_num // 2 + beam_profile.shape[0] // 2,
-y_num // 2 - beam_profile.shape[1] // 2:y_num // 2 + beam_profile.shape[1] // 2,
-x_num // 2] = beam_profile'''
-
-# Beam direction
-beam_dir = np.array([np.sin(theta), 0, -np.cos(theta)])
-
-# Define detector
-detector_distance = int(120e-3 / dx)
-detector_height_cells = int(300e-3 / dx)
-detector_width = 10
-detector_vertices = np.array([
-    [detector_distance, -detector_width / 2, detector_height_cells],
-    [detector_distance, detector_width / 2, detector_height_cells],
-    [detector_distance, 0, 0]
-])
-
-# Initialize arrays
-beam_interactions = np.zeros_like(domain)
-scattered_electrons = np.zeros((num_scan_lines, int(build_surface_width / pixel_size)))
-
-
-def compute_reflection_vector(beam_dir, surface_normal):
-    beam_dir = beam_dir / np.linalg.norm(beam_dir)
-    surface_normal = surface_normal / np.linalg.norm(surface_normal)
-    p_reflected = beam_dir - 2 * np.dot(beam_dir, surface_normal) * surface_normal
-    return p_reflected
-
-
-def bse_coefficient_0(A):
-    return -0.0254 + 0.016 * A - 1.86e-4 * A ** 2 + 8.3e-7 * A ** 3
-
-
-def bse_coefficient(theta, A):
-    B = 0.89
-    if theta == 0:
-        return bse_coefficient_0(A)
-    return B * (bse_coefficient_0(A) / B) ** np.cos(theta)
-
-
-def scattering_function_correction(theta):
-    return 1.01 - 4.06 * theta + 1.36e-5 * theta ** 2 - 1.71 * theta ** 3
-
-
-def scattering_function(theta, alpha, beta, A):
-    g_BSE = (bse_coefficient_0(A) / pi) * np.cos(alpha) + \
-            (((bse_coefficient(theta, A) - bse_coefficient_0(A)) / scattering_function_correction(theta)) * \
-             ((k + 1) / (2 * pi))) * np.cos(beta) ** k
-    return g_BSE
-
-
-# Iterate over the beam profile
-for profile_x in range(beam_profile.shape[0]):
-    for profile_y in range(beam_profile.shape[1]):
-        for profile_z in range(z_num):
-            domain_x = x_num // 2 - beam_profile.shape[0] // 2 + profile_x
-            domain_y = y_num // 2 - beam_profile.shape[1] // 2 + profile_y
-            if (0 <= domain_x < x_num) and (0 <= domain_y < y_num):
-                if beam_profile_3d[profile_x, profile_y, profile_z] > 0.001:
-                    beam_interactions[profile_z, domain_y, domain_x] += beam_profile_3d[profile_x, profile_y, profile_z]
-
-                    cell_center = np.array([domain_x, domain_y, profile_z]) * dx
-                    v_1 = detector_vertices[0] - cell_center
-                    v_2 = detector_vertices[1] - cell_center
-                    v_3 = detector_vertices[2] - cell_center
-
-                    omega = np.linalg.norm(np.cross(v_1, v_2)) + \
-                            np.linalg.norm(np.cross(v_2, v_3)) + \
-                            np.linalg.norm(np.cross(v_3, v_1))
-
-                    norm_surface_normal = n_s[profile_z, domain_y, domain_x]
-                    if norm_surface_normal.shape[0] == 5:
-                        norm_surface_normal = norm_surface_normal[0]
-
-                    alpha = np.arccos(np.dot(norm_surface_normal, beam_dir) /
-                                      (np.linalg.norm(norm_surface_normal) * np.linalg.norm(beam_dir)))
-                    p_reflected = compute_reflection_vector(beam_dir, norm_surface_normal)
-                    beta = np.arccos(np.dot(p_reflected, norm_surface_normal) /
-                                     (np.linalg.norm(p_reflected) * np.linalg.norm(norm_surface_normal)))
-
-                    g_value = scattering_function(theta, alpha, beta, atomic_number)
-                    N1_d = beam_interactions[profile_z, domain_y, domain_x] * g_value * omega
-                    scattered_electrons[:, domain_x] += N1_d
-
-
-# Ray tracing function
-def ray_tracing(build_surface, detectors, alpha, beta):
-    electrons_per_detector = np.zeros_like(detectors)
-    for i in range(build_surface.shape[0]):
-        for j in range(build_surface.shape[1]):
-            scattering_value = scattering_function(theta, alpha, beta, atomic_number)
-            bse_value = bse_coefficient(theta, atomic_number)
-            correction_value = scattering_function_correction(theta)
-            interaction = scattering_value * bse_value * correction_value * np.random.random()
-            for d in range(detectors.shape[0]):
-                electrons_per_detector[d, i, j] += interaction
-    return electrons_per_detector, scattering_value, bse_value, correction_value
-
-
-# Define parameters based on the paper
-alpha = 0.5  # Example value for alpha (related to Lambert reflection)
-beta = 0.5  # Example value for beta (related to the scattering lobe)
-
-# Initialize build surface and detectors
-build_surface = np.zeros((num_scan_lines, int(build_surface_width / pixel_size)))
-detectors = np.zeros((4, num_scan_lines, int(build_surface_width / pixel_size)))
-
-# Perform ray tracing
-electrons_per_detector, scattering_value, bse_value, correction_value = ray_tracing(build_surface, detectors, alpha,
-                                                                                    beta)
-
-# Convert detected electrons to voltage
-voltage_per_detector = electrons_per_detector * e * transimpedance
-
-# Log the results
-print(f"Scattering Value: {scattering_value}")
-print(f"BSE Coefficient: {bse_value}")
-print(f"Correction Value: {correction_value}")
-print(f"Electrons per detector:\n{electrons_per_detector}")
-print(f"Voltage per detector:\n{voltage_per_detector}")
diff --git a/src/ray_tracer/raytracerMM.py b/src/ray_tracer/raytracerMM.py
index eced7951d93ffb0b0a2a297e76eb12349fe17570..7dcb79dfd4820eb2ce486cd55f4ca87400e76623 100644
--- a/src/ray_tracer/raytracerMM.py
+++ b/src/ray_tracer/raytracerMM.py
@@ -1,5 +1,6 @@
 import numpy as np
 import sympy as sp
+# import matplotlib.pyplot as plt
 from typing import Tuple
 from src.materials.alloys.Alloy import Alloy
 from src.materials.alloys import Ti6Al4V
@@ -22,25 +23,28 @@ print('Ti6Al4V.composition: ', Ti6Al4V.composition)
 # print('SS316L.elements: ', SS316L.elements)
 # print('SS316L.composition: ', SS316L.composition)
 print('A: ', A)
-dx: float = 10e-6  # Cell size in meters
-dy: float = 10e-6  # Cell size in meters
-dz: float = 5e-6  # Cell size in meters
-sigma: float = 300e-6 / (2 * dx)  # Beam profile sigma
+dx: float = 1000e-6  # Cell size in meters
+dy: float = 1000e-6  # Cell size in meters
+dz: float = 1000e-6  # Cell size in meters
+sigma: float = 3000e-6 / (2 * dx)  # Beam profile sigma
 threshold: float = 0.0001  # Beam profile threshold
 # The voltage at detector d can be computed using the transimpedance T (in units of V /A ) of a virtual amplifier in the context of the model
 transimpedance: float = 1e5  # Transimpedance in Volts per Ampere
 eV_threshold = 50  # Threshold energy in eV
 threshold_energy = eV_threshold * e  # Threshold energy in joules
 print('threshold_energy: ', threshold_energy)
+# Define the work distance (height of the electron beam gun)
+w_d = 12e-3  # Work distance in meters (272 mm)
 
 # Domain setup (Section 2.1.1)
 # Define the 3D domain for the simulation
-x_min, x_max = -250e-6, 250e-6
-y_min, y_max = -250e-6, 250e-6
-z_min, z_max = 0, 20e-6
+x_min, x_max = -5000e-6, 5000e-6  # -500e-6, 500e-6
+y_min, y_max = -5000e-6, 5000e-6  # -250e-6, 250e-6
+z_min, z_max = 0, 4000e-6
 x_num = int((x_max - x_min) / dx) + 1
 y_num = int((y_max - y_min) / dy) + 1
 z_num = int((z_max - z_min) / dz) + 1
+print(f"Domain dimensions: x={x_num}, y={y_num}, z={z_num}")
 domain = np.zeros((z_num, y_num, x_num))  # (z, y , x)
 print('domain shape: ', np.shape(domain))
 domain[0:2, :, :] = 1  # Solid cells
@@ -84,17 +88,19 @@ print('interface_layer: ', interface_layer)
 
 # Beam profile (Section 2.1)
 # Define the Gaussian beam profile
-x = np.arange(-30, 31)
-y = np.arange(-30, 31)
+x = np.arange(-2, 3)
+y = np.arange(-2, 3)
 X, Y = np.meshgrid(x, y)
 beam_profile = np.exp(-(X ** 2 + Y ** 2) / (2 * sigma ** 2))
 beam_profile /= np.sum(beam_profile)  # Normalize beam profile
+print('beam_profile: ', beam_profile)
 
 # Detector setup (Section 2.1.1)
 # Define positions of the detectors
-detector_height = 272e-3  # Detector height in meters
-detector_spacing = 127e-3  # Detector spacing in meters
-detector_diameter = 50e-3  # Detector diameter in meters
+detector_height = 12e-3  # Detector height in meters (272 mm)
+detector_spacing = 5e-3  # Detector spacing in meters (127 mm)
+detector_diameter = 5e-3  # Detector diameter in meters (50 mm)
+
 detector_positions = np.array([
     [detector_spacing, 0, detector_height],  # Right detector
     [-detector_spacing, 0, detector_height],  # Left detector
@@ -103,6 +109,33 @@ detector_positions = np.array([
 ])
 
 
+
+
+# Define the origin coordinates
+origin_x = (x_num - 1) // 2  # Assuming x_num = 101, origin_x = 50
+print('origin_x: ',origin_x)
+origin_y = (y_num - 1) // 2  # Assuming y_num = 51, origin_y = 25
+print('origin_y: ',origin_y)
+origin_z = 2  # Assuming the origin is at z=2 (interface)
+
+origin_point = np.array([origin_x * dx, origin_y * dy, origin_z * dz])
+# origin_point = np.array([0, 0, 0])
+print('origin_x * dx: ', origin_x * dx)
+print("origin_point: ", origin_point)
+
+# Loop over detector positions
+for i in range(len(detector_positions)):
+    for j in range(i+1, len(detector_positions)):
+        dist = np.linalg.norm(detector_positions[i] - detector_positions[j])
+        print(f"Distance between detectors {i+1} and {j+1}: {dist}")
+
+    # Calculate distance from detector to origin
+    dist_to_origin = np.linalg.norm(detector_positions[i] - origin_point)
+    print(f"Distance from detector {i+1} to origin ({origin_x}, {origin_y}, {origin_z}): {dist_to_origin}")
+
+
+
+
 # Define detector vertices for solid angle calculation
 def get_detector_vertices(detector_position: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
     """
@@ -115,11 +148,13 @@ def get_detector_vertices(detector_position: np.ndarray) -> Tuple[np.ndarray, np
     tuple: The vertices of the detector surface.
     """
     # Calculate the side length of the detector
-    side_length = np.sqrt(3) * detector_diameter / 2
-    height = (np.sqrt(3) / 2) * side_length
+    side_length = np.sqrt(3) * (detector_diameter / 2)
+    height = np.sqrt(3) * (side_length / 2)
 
     # Determine the orientation based on the detector position
-    if detector_position[0] > 0:  # Right detector
+    if np.allclose(detector_position[:2], [0, 0]):
+        raise ValueError("Detector position cannot be at the origin (0, 0, 0)")
+    elif detector_position[0] > 0:  # Right detector
         _v1 = detector_position + np.array([2/3*height, 0, 0])
         _v2 = detector_position + np.array([-height/3, side_length/2, 0])
         _v3 = detector_position + np.array([-height/3, -side_length/2, 0])
@@ -131,10 +166,20 @@ def get_detector_vertices(detector_position: np.ndarray) -> Tuple[np.ndarray, np
         _v1 = detector_position + np.array([0, 2/3*height, 0])
         _v2 = detector_position + np.array([-side_length/2, -height/3, 0])
         _v3 = detector_position + np.array([side_length/2, -height/3, 0])
-    else:  # Front detector
+    elif detector_position[1] < 0:  # Front detector
         _v1 = detector_position + np.array([0, -2/3*height, 0])
         _v2 = detector_position + np.array([side_length/2, height/3, 0])
         _v3 = detector_position + np.array([-side_length/2, height/3, 0])
+    else:
+        raise ValueError("Invalid detector position. Must be in one of the four expected positions.")
+
+    print(f"Detector {det_idx+1} vertices:")
+    print(f"v1: {_v1}")
+    print(f"v2: {_v2}")
+    print(f"v3: {_v3}")
+    _normal = np.cross(_v2 - _v1, _v3 - _v1)
+    _normal /= np.linalg.norm(_normal)
+    print(f"Detector {det_idx+1} normal vector: {_normal}")
 
     return _v1, _v2, _v3
 
@@ -251,139 +296,227 @@ def g_BSE(_theta: float, _alpha: float, _beta: float) -> float:
     return diffusive_part #+ reflective_part
 
 
-# Beam direction
-phi: float = 0.0  # Angle in degrees (for phi > ~14.25°, C(theta) becomes negative)
-phi_rad: float = np.deg2rad(phi)  # Convert to radians
-p_E = np.array([np.sin(phi_rad), 0, -np.cos(phi_rad)])  # Beam direction vector
-p_E /= np.linalg.norm(p_E)
+# Calculate the beam direction vector p_E
+def calculate_p_E(_beam_x, _beam_y):
+    # Calculate the vector from the beam gun to the beam location
+    beam_vector = np.array([_beam_x * dx, _beam_y * dy, -w_d])
+    # Normalize the beam vector
+    _p_E = beam_vector / np.linalg.norm(beam_vector)
+    return _p_E
+
+
+# Calculate phi as the angle between p_E and the negative z-axis
+def calculate_phi(_p_E):
+    z_axis = np.array([0, 0, -1])
+    dot_product = np.dot(_p_E, z_axis)
+    _phi_rad = np.arccos(np.clip(dot_product, -1.0, 1.0))
+    return _phi_rad
 
-# Initialize total electrons counter and electrons per detector array
-total_electrons: int = 0
-electrons_per_detector = np.zeros(len(detector_positions))
-print('x_num: ', x_num)
+
+'''print('x_num: ', x_num)
 print('y_num: ', y_num)
-beam_loc = ((x_num+1) // 2, (y_num+1) // 2)  # (x, y)
+beam_loc = ((x_num-1) // 2, (y_num-1) // 2)  # (x, y)
 print('beam_loc: ', beam_loc)
-
-# loop over beam_loc from x_num // 4 to (x_num*3)//4
-#    compute phi depending on beam origin and beam_loc
-
-# Iterate over the beam profile
-for profile_x in range(beam_profile.shape[0]):
-    for profile_y in range(beam_profile.shape[1]):
-        # print(f"beam_profile.shape ({profile_x}, {profile_y}):")
-        domain_x = beam_loc[0] - beam_profile.shape[0] // 2 + profile_x
-        domain_y = beam_loc[1] - beam_profile.shape[1] // 2 + profile_y
-        # print(f"Domain ({domain_x}, {domain_y}):")
-        if (0 <= domain_x < x_num) and (0 <= domain_y < y_num):
-            if beam_profile[profile_x, profile_y] > threshold:
-                n_S_local = n_S[interface_layer, domain_y, domain_x]  # Interface layer
-
-                # Debugging print statements
-                print(f"grad_x: {grad_x[2, domain_y, domain_x]}")
-                print(f"grad_y: {grad_y[2, domain_y, domain_x]}")
-                print(f"grad_z: {grad_z[2, domain_y, domain_x]}")
-                print(f"n_S_local: {n_S_local}")
-
-                if not np.allclose(np.linalg.norm(n_S_local), 1.0):
-                    raise ValueError("n_S_local is not normalized.")
-
-                if np.allclose(n_S_local, np.array([0, 0, 1])) or np.allclose(n_S_local, np.array([0, 0, -1])):
-                    print('hellooo')
-                    theta_local: float = phi_rad
-                else:
-                    theta_local: float = np.arccos(
-                        np.clip(np.dot(p_E, n_S_local) / (np.linalg.norm(p_E) * np.linalg.norm(n_S_local)),
-                                -1.0, 1.0)
-                    )
-
-                for det_idx, det_pos in enumerate(detector_positions):
-                    det_idx: int
-                    det_pos: np.ndarray[int]
-
-                    det_vec = det_pos - np.array([domain_x * dx, domain_y * dx, 0])
-                    det_vec /= np.linalg.norm(det_vec)
-
-                    # Debugging print statements
-                    print(f"Detector Position: {det_pos}")
-                    print(f"Detector Position Type: {type(det_pos)}")
-                    print(f"Domain Point: {np.array([domain_x * dx, domain_y * dx, 0])}")
-                    print(f"Detector Vector: {det_vec}")
-                    print(f"Dot product: {np.dot(det_vec, n_S_local)}")
-
-                    alpha = np.arccos(np.clip(np.dot(det_vec, n_S_local), -1.0, 1.0))
-
-                    p_E_reflected = p_E - 2 * np.dot(p_E, n_S_local) * n_S_local
-                    p_E_reflected /= np.linalg.norm(p_E_reflected)
-
-                    beta = np.arccos(np.clip(np.dot(p_E_reflected, det_vec), -1.0, 1.0))
-
-                    # Apply energy threshold
-                    # electron_energy = N_0 * e * g_bse_value
-                    # print('electron_energy: ', electron_energy)
-
-                    # Calculate the initial energy of the primary electrons (E_0)
-                    V_acc = 60e3  # Accelerating voltage in Volts
-                    E_0 = V_acc * e  # Initial energy of primary electrons in joules
-
-                    # Calculate the energy of the backscatter electrons
-                    # E_BSE = E_0 * eta(theta_local, A)
-                    # Compute a weighted average BSE coefficient for the alloy
-                    E_BSE = E_0 * compute_weighted_eta(theta_local, Ti6Al4V)
-                    print('E_BSE: ', E_BSE)
-
-                    # Bifurcation logic for PE and SE
-                    if E_BSE > threshold_energy:
-                        # Backscatter electrons (BSE)
-                        # Compute scattering function g_bse_value
-                        g_bse_value = g_BSE(theta_local, alpha, beta)
-
-                        # Ensure g_bse_value is non-negative
-                        if g_bse_value < 0:
-                            raise ValueError("g_bse_value is negative, check calculations.")
-
-                        # Compute solid angle (Equation A1)
-                        v1, v2, v3 = get_detector_vertices(det_pos)
-                        print("Vertices:", v1, v2, v3)
-                        # print("Type of vertices:", type(v1), type(v2), type(v3))
-                        dOmega = compute_solid_angle(v1, v2, v3)
-
-                        # Compute number of electrons collected by the detector (Equation A2)
-                        # The beam ray stays on each grid point ij for the dwell time t_d and distributes N_0 PE into it
-                        N_0 = beam_current * dwell_time / e  # Equation 3, Section 2.1.1
-
-                        # There are N1 electrons in the chamber after the 1st scattering event
-                        # N_1 = N_0 * eta(theta_local, A)  # Equation 4, Section 2.1.1
-                        # Compute a weighted average BSE coefficient for the alloy
-                        N_1 = N_0 * compute_weighted_eta(theta_local, Ti6Al4V)  # Equation 4, Section 2.1.1
-
-                        # The number of electrons N_1_d collected by a general detector d
-                        N_1_d = N_1 * np.sum(g_bse_value * dOmega)  # Equation 5, Section 2.1.1
-
-                        # Accumulate electrons per detector
-                        electrons_per_detector[det_idx] += N_1_d
-
-                        # Add to total electrons
-                        total_electrons += N_1_d
+domain_center = ((x_num-1)/2, (y_num-1)/2)
+print(f"Domain center: {domain_center}")
+print(f"Is beam centered: {beam_loc == domain_center}")'''
+
+
+# Initialize arrays to store angles
+phi_angles = []
+theta_local_angles = []
+alpha_angles = []
+beta_angles = []
+
+# Initialize arrays to store electron counts
+electrons_per_detector_per_step = np.zeros((x_num, y_num, len(detector_positions)))
+total_electrons_per_step = np.zeros((x_num, y_num))
+
+# Calculate the center indices
+x_center = (x_num - 1) // 2  # 5
+y_center = (y_num - 1) // 2  # 5
+print('x_center, y_center: ', x_center, y_center)
+
+# Main simulation loop to iterate over the simulation domain
+# for beam_x in range(x_num//4, (x_num*3)//4+2):
+for beam_x in range(x_center, x_center + 1):
+    for beam_y in range(y_center, y_center + 1):
+        beam_loc = (beam_x, beam_y)
+        print(f"Beam location: {beam_loc}")
+
+        # Calculate the beam direction vector p_E
+        p_E = calculate_p_E(beam_x - x_center, beam_y - y_center)
+        print('p_E: ', p_E)
+        # dx = (beam_x - x_center) * dx
+        # dy = (beam_y - y_center) * dy
+
+        # Calculate phi as the angle between p_E and the negative z-axis
+        # phi_rad = np.arctan2(np.sqrt(dx**2 + dy**2), w_d)
+        phi_rad = calculate_phi(p_E)
+        phi_angles.append(phi_rad)  # Store phi angles
+        print(f"phi_rad: {phi_rad}")
+
+        # p_E = np.array([np.sin(phi_rad), 0, -np.cos(phi_rad)])
+        # print('p_E: ', p_E)
+
+        # Reset counters for this beam location
+        total_electrons: int = 0
+        electrons_per_detector = np.zeros(len(detector_positions))
+
+        # Iterate over the beam profile
+        for profile_x in range(beam_profile.shape[0]):
+            for profile_y in range(beam_profile.shape[1]):
+                print(f"beam_profile.shape ({profile_x}, {profile_y}):")
+                domain_x = beam_loc[0] - beam_profile.shape[0] // 2 + profile_x
+                domain_y = beam_loc[1] - beam_profile.shape[1] // 2 + profile_y
+                print(f"Domain ({domain_x}, {domain_y}):")
+                if (0 <= domain_x < x_num) and (0 <= domain_y < y_num):
+                    if beam_profile[profile_x, profile_y] > threshold:
+                        n_S_local = n_S[interface_layer, domain_y, domain_x]  # Interface layer
+                        if not np.allclose(np.linalg.norm(n_S_local), 1.0):
+                            raise ValueError("n_S_local is not normalized.")
 
                         # Debugging print statements
-                        print(f"Domain ({domain_x}, {domain_y}):")
-                        print(f"n_S_local: {n_S_local}")
-                        print(f"theta_local: {np.rad2deg(theta_local)}, "
-                              f"alpha: {np.rad2deg(alpha)}, "
-                              f"beta: {np.rad2deg(beta)}")
-                        print(f"det_vec: {det_vec}")
-                        print(f"p_E_reflected: {p_E_reflected}")
-                        print(f"g_bse_value: {g_bse_value}, N_0: {N_0}, N_1: {N_1}, N_1_d: {N_1_d}")
+                        '''print(f"grad_x: {grad_x[2, domain_y, domain_x]}")
+                        print(f"grad_y: {grad_y[2, domain_y, domain_x]}")
+                        print(f"grad_z: {grad_z[2, domain_y, domain_x]}")
+                        print(f"n_S_local: {n_S_local}")'''
+
+                        if np.allclose(n_S_local, np.array([0, 0, 1])) or np.allclose(n_S_local, np.array([0, 0, -1])):
+                            print('hellooo')
+                            theta_local: float = phi_rad
+                        else:
+                            theta_local: float = np.arccos(
+                                np.clip(np.dot(p_E, n_S_local) / (np.linalg.norm(p_E) * np.linalg.norm(n_S_local)),
+                                        -1.0, 1.0)
+                            )
+                        theta_local_angles.append(theta_local)  # Store theta_local angles
+
+                        scattering_point = np.array([
+                            x_min + domain_x * dx,
+                            y_min + domain_y * dy,
+                            z_min + interface_layer * dz
+                        ])
+                        print('scattering_point: ', scattering_point)
+
+                        for det_idx, det_pos in enumerate(detector_positions):
+                            print(f"Detector {det_idx+1} position: {det_pos}")
+                            det_idx: int
+                            det_pos: np.ndarray[int]
+
+                            # Calculate the vector from the scattering point to the detector
+                            det_vec = det_pos - scattering_point
+                            det_vec /= np.linalg.norm(det_vec)
+                            if not np.allclose(np.linalg.norm(det_vec), 1.0):
+                                raise ValueError("det_vec is not normalized.")
+
+                            # Calculate the distance from the scattering point to the detector
+                            distance = np.linalg.norm(det_pos - scattering_point)
+                            print(f"Distance to detector {det_idx + 1} ({domain_x}, {domain_y}): {distance:.9f} m")
+
+                            # Debugging print statements
+                            print(f"Detector {det_idx+1} position: {det_pos}")
+                            print(f"Detector Position Type: {type(det_pos)}")
+                            print(f"Scattering Point: {scattering_point}")
+                            print(f"Detector Vector (normalized): {det_vec}")
+                            print(f"Dot product: {np.dot(det_vec, n_S_local)}")
+
+                            alpha = np.arccos(np.clip(np.dot(det_vec, n_S_local), -1.0, 1.0))
+                            alpha_angles.append(alpha)  # Store alpha angles
+
+                            p_E_reflected = p_E - 2 * np.dot(p_E, n_S_local) * n_S_local
+                            p_E_reflected /= np.linalg.norm(p_E_reflected)
+
+                            beta = np.arccos(np.clip(np.dot(p_E_reflected, det_vec), -1.0, 1.0))
+                            beta_angles.append(beta)  # Store beta angles
+
+                            # Apply energy threshold
+                            # electron_energy = N_0 * e * g_bse_value
+                            # print('electron_energy: ', electron_energy)
+
+                            # Calculate the initial energy of the primary electrons (E_0)
+                            V_acc = 60e3  # Accelerating voltage in Volts
+                            E_0 = V_acc * e  # Initial energy of primary electrons in joules
+
+                            # Calculate the energy of the backscatter electrons
+                            # E_BSE = E_0 * eta(theta_local, A)
+                            # Compute a weighted average BSE coefficient for the alloy
+                            E_BSE = E_0 * compute_weighted_eta(theta_local, Ti6Al4V)
+                            print('E_BSE: ', E_BSE)
+
+                            # Bifurcation logic for PE and SE
+                            # if E_BSE > threshold_energy:
+                            # Backscatter electrons (BSE)
+                            # Compute scattering function g_bse_value
+                            g_bse_value = g_BSE(theta_local, alpha, beta)
+
+                            # Ensure g_bse_value is non-negative
+                            if g_bse_value < 0:
+                                raise ValueError("g_bse_value is negative, check calculations.")
+
+                            # Compute solid angle (Equation A1)
+                            v1, v2, v3 = get_detector_vertices(det_pos)
+                            normal = np.cross(v2 - v1, v3 - v1)
+                            normal /= np.linalg.norm(normal)
+                            print(f"Detector {det_idx+1}:")
+                            print(f"  Vertices: {v1}, {v2}, {v3}")
+                            print(f"  Normal vector: {normal}")
+                            # print("Type of vertices:", type(v1), type(v2), type(v3))
+                            dOmega = compute_solid_angle(v1 - scattering_point,
+                                                         v2 - scattering_point,
+                                                         v3 - scattering_point)
+                            print(f"Detector {det_idx+1} solid angle: {dOmega}")
+
+                            # Compute number of electrons collected by the detector (Equation A2)
+                            # The beam ray stays on each grid point ij for the dwell time t_d and distributes N_0 PE into it
+                            N_0 = beam_current * dwell_time / e  # Equation 3, Section 2.1.1
+
+                            # There are N1 electrons in the chamber after the 1st scattering event
+                            # N_1 = N_0 * eta(theta_local, A)  # Equation 4, Section 2.1.1
+                            # Compute a weighted average BSE coefficient for the alloy
+                            N_1 = N_0 * compute_weighted_eta(theta_local, Ti6Al4V)  # Equation 4, Section 2.1.1
+
+                            # The number of electrons N_1_d collected by a general detector d
+                            N_1_d = N_1 * np.sum(g_bse_value * dOmega)  # Equation 5, Section 2.1.1
+
+                            # Accumulate electrons per detector
+                            electrons_per_detector[det_idx] += N_1_d
+
+                            # Add to total electrons
+                            total_electrons += N_1_d
+
+                            # Debugging print statements
+                            '''print(f"Domain ({domain_x}, {domain_y}):")
+                            print(f"n_S_local: {n_S_local}")
+                            print(f"theta_local: {np.rad2deg(theta_local)}, "
+                                  f"alpha: {np.rad2deg(alpha)}, "
+                                  f"beta: {np.rad2deg(beta)}")
+                            print(f"det_vec: {det_vec}")
+                            print(f"p_E_reflected: {p_E_reflected}")
+                            print(f"g_bse_value: {g_bse_value}, N_0: {N_0}, N_1: {N_1}, N_1_d: {N_1_d}")'''
+                            # else:
+                            # Secondary electrons (SE)
+                            # Note: This part is currently neglected in the model
+                            # but can be implemented if necessary
+                            # continue  # Skip to the next iteration if energy is below threshold
+
+                        print(f"electrons_per_detector: {electrons_per_detector}")
                         print(f"total_electrons: {total_electrons}")
-                    else:
-                        # Secondary electrons (SE)
-                        # Note: This part is currently neglected in the model
-                        # but can be implemented if necessary
-                        continue  # Skip to the next iteration if energy is below threshold
 
+        # Store results for this beam location
+        electrons_per_detector_per_step[beam_x, beam_y] = electrons_per_detector
+        total_electrons_per_step[beam_x, beam_y] = total_electrons
+
+        print(f"Electrons per detector at this step: {electrons_per_detector}")
+        print(f"Total electrons at this step: {total_electrons}")
+
+
+# Compute final results
+total_electrons_all_steps = np.sum(total_electrons_per_step)
+total_electrons_per_detector = np.sum(electrons_per_detector_per_step, axis=(0, 1))
+
+# print(f"Beam location: {beam_loc}")
 
-print(f"Electrons per detector: {electrons_per_detector}")
-for i, electrons in enumerate(electrons_per_detector):
-    print(f"Electrons hitting detector {i + 1}: {np.sum(electrons)}")
-print(f"Total electrons hitting all detectors: {total_electrons}")
+print(f"Total electrons hitting all detectors: {total_electrons_all_steps}")
+for i, electrons in enumerate(total_electrons_per_detector):
+    print(f"Total electrons hitting detector {i + 1}: {electrons}")
diff --git a/src/ray_tracer/raytracer_plots.py b/src/ray_tracer/raytracer_plots.py
new file mode 100644
index 0000000000000000000000000000000000000000..53d639b7d0693b76c4af7b0a1a360062fd9ae15b
--- /dev/null
+++ b/src/ray_tracer/raytracer_plots.py
@@ -0,0 +1,1101 @@
+import numpy as np
+import sympy as sp
+import matplotlib.pyplot as plt
+import matplotlib.colors as colors
+from typing import Tuple
+from src.materials.alloys.Alloy import Alloy
+from src.materials.alloys import Ti6Al4V
+
+T = sp.Symbol('T')
+
+# Constants
+pi: float = np.pi  # Mathematical constant π
+e: float = 1.60217662e-19  # Elementary charge in Coulombs
+beam_current: float = 3e-3  # Beam current in Amperes
+dwell_time: float = 0.4e-6  # Dwell time in seconds
+# A: float = 21.5  # Approximation for Ti6Al4V
+Ti6Al4V = Ti6Al4V.create_Ti6Al4V(T)
+A: float = Ti6Al4V.atomic_number
+print('Ti6Al4V.elements: ', Ti6Al4V.elements)
+print('Ti6Al4V.composition: ', Ti6Al4V.composition)
+print('A: ', A)
+dx: float = 1000e-6  # Cell size in meters
+dy: float = 1000e-6  # Cell size in meters
+dz: float = 1000e-6  # Cell size in meters
+sigma: float = 3000e-6 / (2 * dx)  # Beam profile sigma
+threshold: float = 0.0001  # Beam profile threshold
+# The voltage at detector d can be computed using the transimpedance T (in units of V /A ) of a virtual amplifier in the context of the model
+transimpedance: float = 1e5  # Transimpedance in Volts per Ampere
+eV_threshold = 50  # Threshold energy in eV
+threshold_energy = eV_threshold * e  # Threshold energy in joules
+print('threshold_energy: ', threshold_energy)
+# Define the work distance (height of the electron beam gun)
+# w_d = 12e-3  # Work distance in meters (272 mm)
+
+# Domain setup (Section 2.1.1)
+# Define the 3D domain for the simulation
+x_min, x_max = -40000e-6, 40000e-6  # -40 mm to 40 mm
+y_min, y_max = -40000e-6, 40000e-6  # -40 mm to 40 mm
+z_min, z_max = -2000e-6, 2000e-6
+x_num = int((x_max - x_min) / dx) + 1
+y_num = int((y_max - y_min) / dy) + 1
+z_num = int((z_max - z_min) / dz) + 1
+print(f"Domain dimensions: x={x_num}, y={y_num}, z={z_num}")
+domain = np.zeros((z_num, y_num, x_num))  # (z, y , x)
+print('domain shape: ', np.shape(domain))
+domain[0:2, :, :] = 1  # Solid cells
+domain[2, :, :] = 0.5  # Interface layer
+
+
+def compute_surface_normals(_domain: np.ndarray) -> Tuple[np.ndarray, Tuple[np.ndarray, np.ndarray, np.ndarray], int]:
+    """
+    Compute surface normals from the domain.
+
+    Args:
+        _domain (ndarray): The 3D domain array.
+
+    Returns:
+        tuple: A tuple containing the surface normals array and the gradients (grad_x, grad_y, grad_z).
+    """
+    _grad_x = np.gradient(_domain, axis=2)  # x-direction
+    _grad_y = np.gradient(_domain, axis=1)  # y-direction
+    _grad_z = np.gradient(_domain, axis=0)  # z-direction
+    _n_S = np.stack((_grad_x, _grad_y, _grad_z), axis=-1)  # (z, y, x, 3)
+    norm = np.linalg.norm(_n_S, axis=-1)  # (z, y, x)
+    interface_mask = (_domain > 0) & (_domain < 1)
+    nonzero_mask = norm > 0
+    valid_mask = interface_mask & nonzero_mask
+
+    _n_S[valid_mask] /= norm[valid_mask][..., np.newaxis]
+    _n_S[valid_mask] *= -1  # Invert the surface normals to point towards the detectors
+
+    if not np.allclose(np.linalg.norm(_n_S[valid_mask], axis=-1), 1.0, atol=1e-8):
+        raise ValueError("n_S is not normalized.")
+
+    _interface_layer = np.argmax(np.any(interface_mask, axis=(1, 2)))
+
+    return _n_S, (_grad_x, _grad_y, _grad_z), _interface_layer
+
+
+# Compute surface normals (Section 2.1.1)
+n_S, (grad_x, grad_y, grad_z), interface_layer = compute_surface_normals(domain)
+print('np.shape(n_S): ', np.shape(n_S))  # (z, y, x, 3)
+print('interface_layer: ', interface_layer)
+
+# Beam profile (Section 2.1)
+# Define the Gaussian beam profile
+x = np.arange(-2, 3)
+y = np.arange(-2, 3)
+X, Y = np.meshgrid(x, y)
+beam_profile = np.exp(-(X ** 2 + Y ** 2) / (2 * sigma ** 2))
+beam_profile /= np.sum(beam_profile)  # Normalize beam profile
+print('beam_profile: ', beam_profile)
+
+# Detector setup (Section 2.1.1)
+# Define positions of the detectors
+detector_height = 272e-3  # Detector height in meters (272 mm)
+detector_spacing = 127e-3  # Detector spacing in meters (127 mm)
+detector_diameter = 50e-3  # Detector diameter in meters (50 mm)
+
+# Define detector positions relative to the origin
+detector_positions = np.array([
+    [detector_spacing, 0, detector_height],
+    [-detector_spacing, 0, detector_height],
+    [0, detector_spacing, detector_height],
+    [0, -detector_spacing, detector_height]
+])
+'''detector_positions = np.array([
+    [x_max, y_max / 2, detector_height],  # Right detector
+    [0, y_max / 2, detector_height],  # Left detector
+    [x_max / 2, y_max, detector_height],  # Back detector
+    [x_max / 2, 0, detector_height]  # Front detector
+])'''
+print('detector_positions:', detector_positions)
+
+# Define the origin coordinates
+'''origin_x = (x_num - 1) // 2  # Assuming x_num = 101, origin_x = 50
+print('origin_x: ', origin_x)
+origin_y = (y_num - 1) // 2  # Assuming y_num = 51, origin_y = 25
+print('origin_y: ', origin_y)
+origin_z = (z_num - 1) // 2  # Assuming the origin is at z=2 (interface)
+
+origin_point = np.array([origin_x * dx, origin_y * dy, origin_z * dz])
+# origin_point = np.array([0, 0, 0])
+print('origin_x * dx: ', origin_x * dx)
+print("origin_point: ", origin_point)
+
+# Loop over detector positions
+for i in range(len(detector_positions)):
+    for j in range(i + 1, len(detector_positions)):
+        dist = np.linalg.norm(detector_positions[i] - detector_positions[j])
+        print(f"Distance between detectors {i + 1} and {j + 1}: {dist}")
+
+    # Calculate distance from detector to origin
+    dist_to_origin = np.linalg.norm(detector_positions[i] - origin_point)
+    print(f"Distance from detector {i + 1} to origin ({origin_x}, {origin_y}, {origin_z}): {dist_to_origin}")'''
+
+
+# Define detector vertices for solid angle calculation
+def get_detector_vertices(detector_position: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """
+    Get the vertices of an equilateral triangle detector surface oriented symmetrically.
+
+    Parameters:
+    detector_position (array-like): The position of the center of the detector.
+
+    Returns:
+    tuple: The vertices of the detector surface.
+    """
+    # Calculate the side length of the detector
+    side_length = np.sqrt(3) * (detector_diameter / 2)
+    height = np.sqrt(3) * (side_length / 2)
+
+    # Determine the orientation based on the detector position
+    if np.allclose(detector_position[:2], [0, 0]):
+        raise ValueError("Detector position cannot be at the origin (0, 0, 0)")
+    elif detector_position[0] > 0:  # Right detector
+        _v1 = detector_position + np.array([2 / 3 * height, 0, 0])
+        _v2 = detector_position + np.array([-height / 3, side_length / 2, 0])
+        _v3 = detector_position + np.array([-height / 3, -side_length / 2, 0])
+    elif detector_position[0] < 0:  # Left detector
+        _v1 = detector_position + np.array([-2 / 3 * height, 0, 0])
+        _v2 = detector_position + np.array([height / 3, -side_length / 2, 0])
+        _v3 = detector_position + np.array([height / 3, side_length / 2, 0])
+    elif detector_position[1] > 0:  # Back detector
+        _v1 = detector_position + np.array([0, 2 / 3 * height, 0])
+        _v2 = detector_position + np.array([-side_length / 2, -height / 3, 0])
+        _v3 = detector_position + np.array([side_length / 2, -height / 3, 0])
+    elif detector_position[1] < 0:  # Front detector
+        _v1 = detector_position + np.array([0, -2 / 3 * height, 0])
+        _v2 = detector_position + np.array([side_length / 2, height / 3, 0])
+        _v3 = detector_position + np.array([-side_length / 2, height / 3, 0])
+    else:
+        raise ValueError("Invalid detector position. Must be in one of the four expected positions.")
+
+    '''print(f"Detector {det_idx+1} vertices:")
+    print(f"v1: {_v1}")
+    print(f"v2: {_v2}")
+    print(f"v3: {_v3}")
+    _normal = np.cross(_v2 - _v1, _v3 - _v1)
+    _normal /= np.linalg.norm(_normal)
+    print(f"Detector {det_idx+1} normal vector: {_normal}")'''
+
+    return _v1, _v2, _v3
+
+
+# Compute solid angle using Equation A1 (Appendix A)
+def compute_solid_angle(vector1: np.ndarray, vector2: np.ndarray, vector3: np.ndarray) -> float:
+    """
+    Compute the solid angle of a 3D triangle.
+
+    Args:
+        vector1: The first vector of the triangle.
+        vector2: The second vector of the triangle.
+        vector3: The third vector of the triangle.
+
+    Returns:
+        The solid angle of the triangle.
+    """
+    # Input validation
+    if not isinstance(vector1, np.ndarray) or not isinstance(vector2, np.ndarray) or not isinstance(vector3,
+                                                                                                    np.ndarray):
+        raise ValueError("Inputs must be numpy arrays")
+    if vector1.shape != (3,) or vector2.shape != (3,) or vector3.shape != (3,):
+        raise ValueError("Inputs must be 3D vectors")
+    if not np.isfinite(vector1).all() or not np.isfinite(vector2).all() or not np.isfinite(vector3).all():
+        raise ValueError("Inputs must be finite")
+
+    cross_product = np.cross(vector2, vector3)
+    # Check if the cross product is nearly zero
+    if np.linalg.norm(cross_product) < 1e-8:
+        raise ValueError("Vectors are nearly coplanar")
+
+    # Compute solid angle
+    try:
+        # Compute the numerator of the solid angle formula
+        numerator: float = np.dot(vector1, cross_product)
+        # Check if the numerator is nearly zero
+        if np.abs(numerator) < 1e-8:
+            raise ValueError("Vectors are nearly parallel")
+
+        # Compute the denominator of the solid angle formula
+        denominator: float = (
+                np.linalg.norm(vector1) * np.linalg.norm(vector2) * np.linalg.norm(vector3)
+                + np.dot(vector1, vector2) * np.linalg.norm(vector3)
+                + np.dot(vector1, vector3) * np.linalg.norm(vector2)
+                + np.dot(vector2, vector3) * np.linalg.norm(vector1)
+        )
+
+        # Compute the solid angle in radians
+        solid_angle = 2 * np.arctan2(numerator, denominator)
+        # print("Solid angle:", solid_angle)
+        return solid_angle
+    except ZeroDivisionError:
+        raise ValueError("Error computing solid angle: division by zero")
+    except np.linalg.LinAlgError:
+        raise ValueError("Error computing solid angle: linear algebra error")
+
+
+# BSE coefficient functions (Equations 11 and 10, Section 2.1.2)
+def eta_0(_A: float) -> float:
+    """Equation 11 from the paper"""
+    _eta_0: float = -0.0254 + 0.016 * _A - 1.86e-4 * _A ** 2 + 8.3e-7 * _A ** 3
+    # print('eta_0: ', _eta_0)
+    return _eta_0
+
+
+# BSE Coefficient for single atomic targets, found to hold in the range of 10–100 keV
+def eta(theta: float, _A: float) -> float:
+    """Equation 10 from the paper"""
+    B: float = 0.89
+    _eta: float = B * (eta_0(_A) / B) ** np.cos(theta)
+    # print('eta: ', _eta)
+    return _eta
+
+
+# Compute the weighted average BSE coefficient for the alloy
+def compute_weighted_eta(theta: float, alloy: Alloy) -> float:
+    """Equation 12 from the paper"""
+    weighted_eta: float = 0
+    for element, weight_fraction in zip(alloy.elements, alloy.composition):
+        # print('element: ', element)
+        # print('weight_fraction: ', weight_fraction)
+        _A: float = element.atomic_number
+        # print('A: ', _A)
+        eta_i: float = eta(theta, _A)
+        # print('eta_i: ', eta_i)
+        weighted_eta += weight_fraction * eta_i
+        # print('weighted_eta: ', weighted_eta)
+    return weighted_eta
+
+
+# Correction function C(theta) (Equation 14, Section 2.1.3)
+def C(theta: float) -> float:
+    """Equation 14 from the paper"""
+    theta = np.rad2deg(theta)  # Ensure angle is in degrees for C function
+    _C: float = 1.01 - 4.06 * theta + 1.36e-5 * theta ** 2 - 1.71e-6 * theta ** 3
+    # print('C: ', _C)
+    # Check if C is negative and raise an exception
+    if _C < 0:
+        raise ValueError(f"C function returned a negative value for theta: {theta}. "
+                         f"Please check the input and the C function implementation.")
+    return _C
+
+
+# Scattering function g_BSE (Equation 13, Section 2.1.3)
+def g_BSE(_theta: float, _alpha: float, _beta: float) -> float:
+    """Equation 13 from the paper"""
+    # k: float = np.rad2deg(_theta) / 13  # Correct implementation as per the paper
+    diffusive_part: float = (eta_0(A) / pi) * np.cos(_alpha)
+    # reflective_part: float = ((eta(_theta, A) - eta_0(A)) / C(_theta)) * ((k + 1) / (2 * pi)) * (np.cos(_beta) ** k)
+
+    # Compute a weighted average BSE coefficient for the alloy
+    # reflective_part: float = ((compute_weighted_eta(_theta, Ti6Al4V) - eta_0(A)) / C(_theta)) * ((k + 1) / (2 * pi)) * (np.cos(_beta) ** k)
+
+    # print('g_BSE: ', diffusive_part + reflective_part)
+    return diffusive_part  #+ reflective_part
+
+
+# Calculate the beam direction vector p_E
+def calculate_p_E(_beam_x, _beam_y):
+    # Calculate the vector from the beam gun to the beam location
+    beam_vector = np.array([_beam_x * dx, _beam_y * dy, -detector_height])
+    # Normalize the beam vector
+    _p_E = beam_vector / np.linalg.norm(beam_vector)
+    return _p_E
+
+
+# Calculate phi as the angle between p_E and the negative z-axis
+def calculate_phi(_p_E):
+    z_axis = np.array([0, 0, -1])
+    dot_product = np.dot(_p_E, z_axis)
+    _phi_rad = np.arccos(np.clip(dot_product, -1.0, 1.0))
+    return _phi_rad
+
+
+def set_beam_movement(_x_center, _y_center, _x_offset, _y_offset, _movement_type):
+    if _movement_type == 'X':
+        if _x_offset >= 0:
+            _beam_x_start = _x_center + _x_offset
+            _beam_x_end = _x_center - _x_offset - 1
+        else:
+            _beam_x_start = _x_center + _x_offset
+            _beam_x_end = _x_center - _x_offset + 1
+        _beam_y_start = _y_center + _y_offset
+        _beam_y_end = _y_center + _y_offset + 1
+    elif _movement_type == 'Y':
+        _beam_x_start = _x_center + _x_offset
+        _beam_x_end = _x_center + _x_offset + 1
+        if _y_offset >= 0:
+            _beam_y_start = _y_center + _y_offset
+            _beam_y_end = _y_center - _y_offset - 1
+        else:
+            _beam_y_start = _y_center + _y_offset
+            _beam_y_end = _y_center - _y_offset + 1
+    elif _movement_type == 'XY':
+        if _x_offset >= 0:
+            _beam_x_start = _x_center + _x_offset
+            _beam_x_end = _x_center - _x_offset - 1
+        else:
+            _beam_x_start = _x_center + _x_offset
+            _beam_x_end = _x_center - _x_offset + 1
+        if _y_offset >= 0:
+            _beam_y_start = _y_center + _y_offset
+            _beam_y_end = _y_center - _y_offset - 1
+        else:
+            _beam_y_start = _y_center + _y_offset
+            _beam_y_end = _y_center - _y_offset + 1
+    else:
+        raise ValueError("movement_type must be 'X' or 'Y' or 'XY'")
+
+    return _beam_x_start, _beam_x_end, _beam_y_start, _beam_y_end
+
+
+
+'''print('x_num: ', x_num)
+print('y_num: ', y_num)
+beam_loc = ((x_num-1) // 2, (y_num-1) // 2)  # (x, y)
+print('beam_loc: ', beam_loc)
+domain_center = ((x_num-1)/2, (y_num-1)/2)
+print(f"Domain center: {domain_center}")
+print(f"Is beam centered: {beam_loc == domain_center}")'''
+
+
+# Initialize arrays to store electron counts
+electrons_per_detector_per_step = np.zeros((x_num, y_num, len(detector_positions)))
+total_electrons_per_step = np.zeros((x_num, y_num))
+
+# Additional arrays to capture detailed information
+electrons_per_detector_per_beam_profile = np.zeros(
+    (beam_profile.shape[0], beam_profile.shape[1], len(detector_positions)))
+total_electrons_per_beam_profile = np.zeros((beam_profile.shape[0], beam_profile.shape[1]))
+
+# Calculate the center indices
+x_center = (x_num - 1) // 2
+y_center = (y_num - 1) // 2
+
+x_offset: int = -5
+y_offset: int = 20
+movement_type: str = 'X'
+
+# Beam X-Movement
+# beam_x_start, beam_x_end = x_center - x_offset, x_center + x_offset + 1
+# beam_y_start, beam_y_end = y_center + y_offset, y_center + y_offset + 1
+
+# Beam Y-Movement
+# beam_x_start, beam_x_end = x_center + x_offset, x_center + x_offset + 1
+# beam_y_start, beam_y_end = y_center - y_offset, y_center + y_offset + 1
+
+# For X or Y Movement
+beam_x_start, beam_x_end, beam_y_start, beam_y_end = set_beam_movement(
+    x_center, y_center, x_offset, y_offset, movement_type)
+
+# Determine the step size based on the direction of movement
+step_x = 1 if beam_x_start <= beam_x_end else -1
+step_y = 1 if beam_y_start <= beam_y_end else -1
+
+print(beam_x_start, beam_x_end, beam_y_start, beam_y_end, step_x, step_y)
+
+# exit()
+
+# Main simulation loop to iterate over the simulation domain
+for beam_x in range(beam_x_start, beam_x_end, step_x):
+    for beam_y in range(beam_y_start, beam_y_end, step_y):
+        beam_loc = (beam_x, beam_y)
+        print(f"Beam location: {beam_loc}")
+
+        # Calculate the beam direction vector p_E
+        p_E = calculate_p_E(beam_x - x_center, beam_y - y_center)
+        # print('p_E: ', p_E)
+
+        # Calculate phi as the angle between p_E and the negative z-axis
+        # phi_rad = np.arctan2(np.sqrt(dx**2 + dy**2), w_d)
+        phi_rad = calculate_phi(p_E)
+        # print(f"phi_rad: {phi_rad}")
+
+        # Reset counters for this beam location
+        total_electrons: int = 0
+        electrons_per_detector = np.zeros(len(detector_positions))
+
+        # Iterate over the beam profile
+        for profile_x in range(beam_profile.shape[0]):
+            for profile_y in range(beam_profile.shape[1]):
+                print(f"Beam profile shape ({profile_x}, {profile_y}):")
+                domain_x = beam_loc[0] - beam_profile.shape[0] // 2 + profile_x
+                domain_y = beam_loc[1] - beam_profile.shape[1] // 2 + profile_y
+                print(f"Domain ({domain_x}, {domain_y}):")
+
+                if (0 <= domain_x < x_num) and (0 <= domain_y < y_num):
+                    if beam_profile[profile_x, profile_y] > threshold:
+                        n_S_local = n_S[interface_layer, domain_y, domain_x]  # Interface
+                        if not np.allclose(np.linalg.norm(n_S_local), 1.0):
+                            raise ValueError("n_S_local is not normalized.")
+
+                        # Debugging print statements
+                        '''print(f"grad_x: {grad_x[2, domain_y, domain_x]}")
+                        print(f"grad_y: {grad_y[2, domain_y, domain_x]}")
+                        print(f"grad_z: {grad_z[2, domain_y, domain_x]}")
+                        print(f"n_S_local: {n_S_local}")'''
+
+                        if np.allclose(n_S_local, np.array([0, 0, 1])) or np.allclose(n_S_local, np.array([0, 0, -1])):
+                            print('hellooo')
+                            theta_local: float = phi_rad
+                        else:
+                            theta_local: float = np.arccos(
+                                np.clip(np.dot(p_E, n_S_local) / (np.linalg.norm(p_E) * np.linalg.norm(n_S_local)),
+                                        -1.0, 1.0)
+                            )
+
+                        scattering_point = np.array([
+                            x_min + domain_x * dx,
+                            y_min + domain_y * dy,
+                            z_min + interface_layer * dz
+                        ])
+                        print('scattering_point: ', scattering_point)
+
+                        for det_idx, det_pos in enumerate(detector_positions):
+                            print(f"Detector {det_idx + 1} position: {det_pos}")
+                            det_idx: int
+                            det_pos: np.ndarray[float]
+
+                            # Calculate the vector from the scattering point to the detector
+                            det_vec = det_pos - scattering_point
+                            det_vec /= np.linalg.norm(det_vec)
+                            if not np.allclose(np.linalg.norm(det_vec), 1.0):
+                                raise ValueError("det_vec is not normalized.")
+
+                            # Calculate the distance from the scattering point to the detector
+                            distance = np.linalg.norm(det_pos - scattering_point)
+                            print(f"Distance to detector {det_idx + 1} ({domain_x}, {domain_y}): {distance:.9f} m")
+
+                            # Debugging print statements
+                            '''print(f"Detector {det_idx+1} position: {det_pos}")
+                            print(f"Detector Position Type: {type(det_pos)}")
+                            print(f"Scattering Point: {scattering_point}")
+                            print(f"Detector Vector (normalized): {det_vec}")
+                            print(f"Dot product: {np.dot(det_vec, n_S_local)}")'''
+
+                            alpha = np.arccos(np.clip(np.dot(det_vec, n_S_local), -1.0, 1.0))
+
+                            p_E_reflected = p_E - 2 * np.dot(p_E, n_S_local) * n_S_local
+                            p_E_reflected /= np.linalg.norm(p_E_reflected)
+
+                            beta = np.arccos(np.clip(np.dot(p_E_reflected, det_vec), -1.0, 1.0))
+
+                            # Apply energy threshold
+                            # electron_energy = N_0 * e * g_bse_value
+                            # print('electron_energy: ', electron_energy)
+
+                            # Calculate the initial energy of the primary electrons (E_0)
+                            V_acc = 60e3  # Accelerating voltage in Volts
+                            E_0 = V_acc * e  # Initial energy of primary electrons in joules
+
+                            # Calculate the energy of the backscatter electrons
+                            # E_BSE = E_0 * eta(theta_local, A)
+                            # Compute a weighted average BSE coefficient for the alloy
+                            E_BSE = E_0 * compute_weighted_eta(theta_local, Ti6Al4V)
+                            # print('E_BSE: ', E_BSE)
+
+                            # Bifurcation logic for PE and SE
+                            # if E_BSE > threshold_energy:
+                            # Backscatter electrons (BSE)
+                            # Compute scattering function g_bse_value
+                            g_bse_value = g_BSE(theta_local, alpha, beta)
+
+                            # Ensure g_bse_value is non-negative
+                            if g_bse_value < 0:
+                                raise ValueError("g_bse_value is negative, check calculations.")
+
+                            # Compute solid angle (Equation A1)
+                            v1, v2, v3 = get_detector_vertices(det_pos)
+                            normal = np.cross(v2 - v1, v3 - v1)
+                            normal /= np.linalg.norm(normal)
+                            '''print(f"Detector {det_idx+1}:")
+                            print(f"  Vertices: {v1}, {v2}, {v3}")
+                            print(f"  Normal vector: {normal}")'''
+
+                            dOmega = compute_solid_angle(v1 - scattering_point, v2 - scattering_point,
+                                                         v3 - scattering_point)
+                            print(f"Detector {det_idx + 1} solid angle: {dOmega}")
+
+                            # Compute number of electrons collected by the detector (Equation A2)
+                            # The beam ray stays on each grid point ij for the dwell time t_d and distributes N_0 PE into it
+                            N_0 = beam_current * dwell_time / e  # Equation 3, Section 2.1.1
+
+                            # There are N1 electrons in the chamber after the 1st scattering event
+                            # N_1 = N_0 * eta(theta_local, A)  # Equation 4, Section 2.1.1
+                            # Compute a weighted average BSE coefficient for the alloy
+                            N_1 = N_0 * compute_weighted_eta(theta_local, Ti6Al4V)  # Equation 4, Section 2.1.1
+
+                            # The number of electrons N_1_d collected by a general detector d
+                            N_1_d = N_1 * np.sum(g_bse_value * dOmega)  # Equation 5, Section 2.1.1
+
+                            # Accumulate electrons per detector for this beam profile point
+                            electrons_per_detector_per_beam_profile[profile_x, profile_y, det_idx] += N_1_d
+
+                            # Accumulate electrons per detector for this domain point
+                            electrons_per_detector_per_step[beam_x, beam_y, det_idx] += N_1_d
+
+                            # Add to total electrons for this beam profile point
+                            total_electrons_per_beam_profile[profile_x, profile_y] += N_1_d
+
+                            # Add to total electrons for this domain point
+                            total_electrons_per_step[beam_x, beam_y] += N_1_d
+
+                            # Debugging print statements
+                            print(f"Before addition: electrons_per_detector[{det_idx + 1}]: {electrons_per_detector[det_idx]}")
+                            print(f"N_0: {N_0}, N_1: {N_1}, g_bse_value: {g_bse_value}, dOmega: {dOmega}")
+                            print(f"N_1_d: {N_1_d}")
+
+                            # Accumulate electrons per detector
+                            electrons_per_detector[det_idx] += N_1_d
+
+                            # Add to total electrons
+                            total_electrons += N_1_d
+
+                            print(f"After addition: electrons_per_detector[{det_idx + 1}]: {electrons_per_detector[det_idx]}")
+
+                # Print detailed information for this beam profile point
+                print(f"Electrons per detector at this beam profile point ({profile_x}, {profile_y}): {electrons_per_detector_per_beam_profile[profile_x, profile_y]}")
+                print(f"Total electrons at this beam profile point ({profile_x}, {profile_y}): {total_electrons_per_beam_profile[profile_x, profile_y]}")
+
+        # Print detailed information for this domain point after iterating over the beam profile
+        print(f"Electrons per detector at this domain point ({beam_x}, {beam_y}): {electrons_per_detector_per_step[beam_x, beam_y]}")
+        print(f"Total electrons at this domain point ({beam_x}, {beam_y}): {total_electrons_per_step[beam_x, beam_y]}")
+
+# Compute final results
+total_electrons_all_steps = np.sum(total_electrons_per_step)
+total_electrons_per_detector = np.sum(electrons_per_detector_per_step, axis=(0, 1))
+
+# Print final results
+print(f"Total electrons hitting all detectors: {total_electrons_all_steps}")
+for i, electrons in enumerate(total_electrons_per_detector):
+    print(f"Total electrons hitting detector {i + 1}: {electrons}")
+
+for beam_x in range(beam_x_start, beam_x_end, step_x):
+    for beam_y in range(beam_y_start, beam_y_end, step_y):
+        print(f"Electrons per detector at this domain point ({beam_x}, {beam_y}): {electrons_per_detector_per_step[beam_x, beam_y]}")
+
+
+
+
+# Plot results
+# Define the domain points
+domain_points_x = np.arange(beam_x_start, beam_x_end, step_x)
+domain_points_y = np.arange(beam_y_start, beam_y_end, step_y)
+
+# Extract electron counts for each detector based on movement type
+if movement_type == 'X':
+    detector_1_electrons_x = electrons_per_detector_per_step[beam_x_start:beam_x_end:step_x, y_center+y_offset, 0].flatten()
+    detector_2_electrons_x = electrons_per_detector_per_step[beam_x_start:beam_x_end:step_x, y_center+y_offset, 1].flatten()
+    detector_3_electrons_x = electrons_per_detector_per_step[beam_x_start:beam_x_end:step_x, y_center+y_offset, 2].flatten()
+    detector_4_electrons_x = electrons_per_detector_per_step[beam_x_start:beam_x_end:step_x, y_center+y_offset, 3].flatten()
+
+    # Plot for detectors 1 and 2
+    plt.figure(figsize=(10, 6))
+    plt.plot(domain_points_x, detector_1_electrons_x, label='Detector 1', marker='o')
+    plt.plot(domain_points_x, detector_2_electrons_x, label='Detector 2', marker='s')
+    # Add annotations
+    for x, y in zip(domain_points_x, detector_1_electrons_x):
+        plt.annotate(f'{y:.2e}', (x, y), textcoords="offset points", xytext=(5,-15), ha='center', fontsize=8, rotation=45)
+    for x, y in zip(domain_points_x, detector_2_electrons_x):
+        plt.annotate(f'{y:.2e}', (x, y), textcoords="offset points", xytext=(5,-15), ha='center', fontsize=8, rotation=45)
+    plt.title("Electrons Captured by Detectors 1 and 2 (X-axis Movement)")
+    plt.xlabel("Domain Point (X-axis)")
+    plt.ylabel("Electrons Captured")
+    plt.legend()
+    plt.grid(True)
+    plt.tight_layout()
+    plt.savefig("detectors_1_2_x_axis.png", dpi=300)
+    plt.close()
+
+    # Plot for detectors 3 and 4
+    plt.figure(figsize=(10, 6))
+    plt.plot(domain_points_x, detector_3_electrons_x, label='Detector 3', marker='o')
+    plt.plot(domain_points_x, detector_4_electrons_x, label='Detector 4', marker='s')
+    # Add annotations
+    for x, y in zip(domain_points_x, detector_3_electrons_x):
+        plt.annotate(f'{y:.8e}', (x, y), textcoords="offset points", xytext=(5,-15), ha='center', fontsize=8, rotation=45)
+    for x, y in zip(domain_points_x, detector_4_electrons_x):
+        plt.annotate(f'{y:.8e}', (x, y), textcoords="offset points", xytext=(5,-15), ha='center', fontsize=8, rotation=45)
+    plt.title("Electrons Captured by Detectors 3 and 4 (X-axis Movement)")
+    plt.xlabel("Domain Point (X-axis)")
+    plt.ylabel("Electrons Captured")
+    plt.legend()
+    plt.grid(True)
+    plt.tight_layout()
+    plt.savefig("detectors_3_4_x_axis.png", dpi=300)
+    plt.close()
+
+    # Normalize the data by subtracting the minimum value
+    detector_3_normalized = detector_3_electrons_x - np.min(detector_3_electrons_x)
+    detector_4_normalized = detector_4_electrons_x - np.min(detector_4_electrons_x)
+
+    plt.figure(figsize=(12, 8))  # Increase figure size for better visibility
+
+    plt.plot(domain_points_x, detector_3_normalized, label='Detector 3', marker='o')
+    plt.plot(domain_points_x, detector_4_normalized, label='Detector 4', marker='s')
+
+    # Add annotations for Detector 3
+    for x, y in zip(domain_points_x, detector_3_normalized):
+        plt.annotate(f'{y:.2f}', (x, y), textcoords="offset points", xytext=(5,-15), ha='center', fontsize=8, rotation=45)
+    # Add annotations for Detector 4
+    for x, y in zip(domain_points_x, detector_4_normalized):
+        plt.annotate(f'{y:.2f}', (x, y), textcoords="offset points", xytext=(5,-15), ha='center', fontsize=8, rotation=45)
+
+    plt.title("Normalized Electrons Captured by Detectors 3 and 4 (X-axis Movement)")
+    plt.xlabel("Domain Point (X-axis)")
+    plt.ylabel("Electrons Captured (Normalized)")
+    plt.legend()
+    plt.grid(True)
+    plt.tight_layout()
+    plt.savefig("detectors_3_4_x_axis_normalized.png", dpi=300)
+    plt.close()
+
+elif movement_type == 'Y':
+    detector_1_electrons_y = electrons_per_detector_per_step[x_center+x_offset, beam_y_start:beam_y_end:step_y, 0].flatten()
+    detector_2_electrons_y = electrons_per_detector_per_step[x_center+x_offset, beam_y_start:beam_y_end:step_y, 1].flatten()
+    detector_3_electrons_y = electrons_per_detector_per_step[x_center+x_offset, beam_y_start:beam_y_end:step_y, 2].flatten()
+    detector_4_electrons_y = electrons_per_detector_per_step[x_center+x_offset, beam_y_start:beam_y_end:step_y, 3].flatten()
+
+    # Plot for detectors 1 and 2
+    plt.figure(figsize=(10, 6))
+    plt.plot(domain_points_y, detector_1_electrons_y, label='Detector 1', marker='o')
+    plt.plot(domain_points_y, detector_2_electrons_y, label='Detector 2', marker='s')
+    # Add annotations
+    for x, y in zip(domain_points_y, detector_1_electrons_y):
+        plt.annotate(f'{y:.8e}', (x, y), textcoords="offset points", xytext=(5,-15), ha='center', fontsize=8, rotation=45)
+    for x, y in zip(domain_points_y, detector_2_electrons_y):
+        plt.annotate(f'{y:.8e}', (x, y), textcoords="offset points", xytext=(5,-15), ha='center', fontsize=8, rotation=45)
+    plt.title("Electrons Captured by Detectors 1 and 2 (Y-axis Movement)")
+    plt.xlabel("Domain Point (Y-axis)")
+    plt.ylabel("Electrons Captured")
+    plt.legend()
+    plt.grid(True)
+    plt.tight_layout()
+    plt.savefig("detectors_1_2_y_axis.png", dpi=300)
+    plt.close()
+
+    # Normalize the data by subtracting the minimum value
+    detector_1_normalized = detector_1_electrons_y - np.min(detector_1_electrons_y)
+    detector_2_normalized = detector_2_electrons_y - np.min(detector_2_electrons_y)
+
+    plt.figure(figsize=(12, 8))  # Increase figure size for better visibility
+
+    plt.plot(domain_points_y, detector_1_normalized, label='Detector 1', marker='o')
+    plt.plot(domain_points_y, detector_2_normalized, label='Detector 2', marker='s')
+
+    # Add annotations for Detector 1
+    for x, y in zip(domain_points_y, detector_1_normalized):
+        plt.annotate(f'{y:.2f}', (x, y), textcoords="offset points", xytext=(5,-15), ha='center', fontsize=8, rotation=45)
+    # Add annotations for Detector 2
+    for x, y in zip(domain_points_y, detector_2_normalized):
+        plt.annotate(f'{y:.2f}', (x, y), textcoords="offset points", xytext=(5,-15), ha='center', fontsize=8, rotation=45)
+
+    plt.title("Normalized Electrons Captured by Detectors 1 and 2 (Y-axis Movement)")
+    plt.xlabel("Domain Point (Y-axis)")
+    plt.ylabel("Electrons Captured (Normalized)")
+    plt.legend()
+    plt.grid(True)
+    plt.tight_layout()
+    plt.savefig("detectors_1_2_y_axis_normalized.png", dpi=300)
+    plt.close()
+
+    # Plot for detectors 3 and 4
+    plt.figure(figsize=(10, 6))
+    plt.plot(domain_points_y, detector_3_electrons_y, label='Detector 3', marker='o')
+    plt.plot(domain_points_y, detector_4_electrons_y, label='Detector 4', marker='s')
+    # Add annotations
+    for x, y in zip(domain_points_y, detector_3_electrons_y):
+        plt.annotate(f'{y:.8e}', (x, y), textcoords="offset points", xytext=(5,-15), ha='center', fontsize=8, rotation=45)
+    for x, y in zip(domain_points_y, detector_4_electrons_y):
+        plt.annotate(f'{y:.8e}', (x, y), textcoords="offset points", xytext=(5,-15), ha='center', fontsize=8, rotation=45)
+    plt.title("Electrons Captured by Detectors 3 and 4 (Y-axis Movement)")
+    plt.xlabel("Domain Point (Y-axis)")
+    plt.ylabel("Electrons Captured")
+    plt.legend()
+    plt.grid(True)
+    plt.tight_layout()
+    plt.savefig("detectors_3_4_y_axis.png", dpi=300)
+    plt.close()
+
+elif movement_type == 'XY':
+    # Extract 2D data for XY movement
+    detector_1_electrons_xy = electrons_per_detector_per_step[beam_x_start:beam_x_end:step_x, beam_y_start:beam_y_end:step_y, 0]
+    detector_2_electrons_xy = electrons_per_detector_per_step[beam_x_start:beam_x_end:step_x, beam_y_start:beam_y_end:step_y, 1]
+    detector_3_electrons_xy = electrons_per_detector_per_step[beam_x_start:beam_x_end:step_x, beam_y_start:beam_y_end:step_y, 2]
+    detector_4_electrons_xy = electrons_per_detector_per_step[beam_x_start:beam_x_end:step_x, beam_y_start:beam_y_end:step_y, 3]
+
+    # Create meshgrid for x and y coordinates
+    X, Y = np.meshgrid(np.arange(beam_x_start, beam_x_end, step_x), np.arange(beam_y_start, beam_y_end, step_y))
+
+    # Plot for detectors 1 and 2 (XY movement)
+    '''plt.figure(figsize=(12, 8))
+    plt.pcolormesh(X, Y, detector_1_electrons_xy.T, shading='auto', cmap='viridis')
+    plt.colorbar(label='Electrons Captured (Detector 1)')
+    plt.title("Electrons Captured by Detector 1 (XY Movement)")
+    plt.xlabel("Domain Point (X-axis)")
+    plt.ylabel("Domain Point (Y-axis)")
+    plt.tight_layout()
+    plt.savefig("detector_1_xy_movement.png", dpi=300)
+    plt.close()
+
+    plt.figure(figsize=(12, 8))
+    plt.pcolormesh(X, Y, detector_2_electrons_xy.T, shading='auto', cmap='viridis')
+    plt.colorbar(label='Electrons Captured (Detector 2)')
+    plt.title("Electrons Captured by Detector 2 (XY Movement)")
+    plt.xlabel("Domain Point (X-axis)")
+    plt.ylabel("Domain Point (Y-axis)")
+    plt.tight_layout()
+    plt.savefig("detector_2_xy_movement.png", dpi=300)
+    plt.close()
+
+    # Plot for detectors 3 and 4 (XY movement)
+    plt.figure(figsize=(12, 8))
+    plt.pcolormesh(X, Y, detector_3_electrons_xy.T, shading='auto', cmap='viridis')
+    plt.colorbar(label='Electrons Captured (Detector 3)')
+    plt.title("Electrons Captured by Detector 3 (XY Movement)")
+    plt.xlabel("Domain Point (X-axis)")
+    plt.ylabel("Domain Point (Y-axis)")
+    plt.tight_layout()
+    plt.savefig("detector_3_xy_movement.png", dpi=300)
+    plt.close()
+
+    plt.figure(figsize=(12, 8))
+    plt.pcolormesh(X, Y, detector_4_electrons_xy.T, shading='auto', cmap='viridis')
+    plt.colorbar(label='Electrons Captured (Detector 4)')
+    plt.title("Electrons Captured by Detector 4 (XY Movement)")
+    plt.xlabel("Domain Point (X-axis)")
+    plt.ylabel("Domain Point (Y-axis)")
+    plt.tight_layout()
+    plt.savefig("detector_4_xy_movement.png", dpi=300)
+    plt.close()'''
+
+
+
+
+
+    # Plot for each detector
+    '''fig, axs = plt.subplots(2, 2, figsize=(12, 10))
+
+    # Detector 1
+    vmin = np.min(detector_1_electrons_xy)
+    vmax = np.max(detector_1_electrons_xy)
+    axs[0, 0].pcolormesh(X, Y, detector_1_electrons_xy, shading='auto', cmap='viridis', vmin=vmin, vmax=vmax, norm=None)
+    axs[0, 0].set_title("Electrons Captured by Detector 1")
+    axs[0, 0].set_xlabel("Domain Point (X-axis)")
+    axs[0, 0].set_ylabel("Domain Point (Y-axis)")
+    # plt.colorbar(label='Electrons Captured')
+    axs[0, 0].set_aspect('equal')
+
+    # Detector 2
+    axs[0, 1].pcolormesh(X, Y, detector_2_electrons_xy, shading='auto', cmap='viridis')
+    axs[0, 1].set_title("Electrons Captured by Detector 2")
+    axs[0, 1].set_xlabel("Domain Point (X-axis)")
+    axs[0, 1].set_ylabel("Domain Point (Y-axis)")
+    axs[0, 1].set_aspect('equal')
+
+    # Detector 3
+    axs[1, 0].pcolormesh(X, Y, detector_3_electrons_xy, shading='auto', cmap='viridis')
+    axs[1, 0].set_title("Electrons Captured by Detector 3")
+    axs[1, 0].set_xlabel("Domain Point (X-axis)")
+    axs[1, 0].set_ylabel("Domain Point (Y-axis)")
+    axs[1, 0].set_aspect('equal')
+
+    # Detector 4
+    axs[1, 1].pcolormesh(X, Y, detector_4_electrons_xy, shading='auto', cmap='viridis')
+    axs[1, 1].set_title("Electrons Captured by Detector 4")
+    axs[1, 1].set_xlabel("Domain Point (X-axis)")
+    axs[1, 1].set_ylabel("Domain Point (Y-axis)")
+    axs[1, 1].set_aspect('equal')
+
+    # Add colorbars
+    for ax in axs.flat:
+        fig.colorbar(plt.cm.ScalarMappable(cmap='viridis'), ax=ax, shrink=0.6)
+
+    # Layout so plots do not overlap
+    fig.tight_layout()
+
+    # Save the figure
+    plt.savefig("electron_heatmaps.png", dpi=300)
+    plt.close()'''
+
+
+
+
+    # Create a figure with multiple subplots
+    fig, axs = plt.subplots(2, 2, figsize=(16, 16))
+
+    # Plot for each detector
+    for i in range(4):
+        row = i // 2
+        col = i % 2
+        im = axs[row, col].pcolormesh(X, Y, electrons_per_detector_per_step[beam_x_start:beam_x_end:step_x, beam_y_start:beam_y_end:step_y, i].T, shading='auto', cmap='viridis', norm=colors.LogNorm())
+        axs[row, col].set_title(f"Electrons Captured by Detector {i+1}")
+        axs[row, col].set_xlabel("Domain Point (X-axis)")
+        axs[row, col].set_ylabel("Domain Point (Y-axis)")
+        fig.colorbar(im, ax=axs[row, col])
+
+    # Adjust layout and save
+    plt.tight_layout()
+    plt.savefig("_all_detectors_xy_movement_.png", dpi=300)
+    plt.close()
+
+    # Now let's create individual plots for each detector
+    for i in range(4):
+        plt.figure(figsize=(10, 8))
+        im = plt.pcolormesh(X, Y, electrons_per_detector_per_step[beam_x_start:beam_x_end:step_x, beam_y_start:beam_y_end:step_y, i].T, shading='auto', cmap='viridis', norm=colors.LogNorm())
+        plt.colorbar(im)
+        plt.title(f"Electrons Captured by Detector {i+1}")
+        plt.xlabel("Domain Point (X-axis)")
+        plt.ylabel("Domain Point (Y-axis)")
+        plt.savefig(f"_detector_{i+1}_xy_movement_.png", dpi=300)
+        plt.close()
+
+
+
+
+    detector_1_electrons_xy = electrons_per_detector_per_step[beam_x_start:beam_x_end, beam_y_start:beam_y_end, 0]
+    print(detector_1_electrons_xy)
+
+    # Create the plot
+    plt.figure(figsize=(10, 8))
+    im = plt.pcolormesh(X, Y, detector_1_electrons_xy, shading='auto', cmap='viridis', norm=colors.LogNorm())
+    plt.colorbar(im, label='Electrons Captured')
+    plt.title("Electrons Captured by Detector 1 (XY Movement)")
+    plt.xlabel("Domain Point (X-axis)")
+    plt.ylabel("Domain Point (Y-axis)")
+
+    # Set the correct extent for the plot
+    # plt.xlim(beam_x_start, beam_x_end - 1)
+    # plt.ylim(beam_y_start, beam_y_end - 1)
+
+    # Adjust color scale to highlight variations
+    '''vmin = np.min(detector_1_electrons_xy)
+    vmax = np.max(detector_1_electrons_xy)
+    plt.clim(float(vmin), float(vmax))'''
+
+    # Add text annotations for each cell
+    for i in range(detector_1_electrons_xy.shape[0]):
+        for j in range(detector_1_electrons_xy.shape[1]):
+            plt.text(beam_x_start + i, beam_y_start + j, f'{detector_1_electrons_xy[i, j]:.2f}',
+                     ha='center', va='center', fontsize=6, color='white')
+
+    plt.tight_layout()
+    plt.savefig("detector_1_xy_movement_corrected.png", dpi=300)
+    plt.close()
+
+    print(f"Min: {np.min(detector_1_electrons_xy)}, Max: {np.max(detector_1_electrons_xy)}")
+
+# --------------------------------------------------
+'''
+# Plot for detectors 1 and 2 (X-axis movement)
+plt.figure(figsize=(10, 6))
+plt.plot(domain_points_x, detector_1_electrons_x, label='Detector 1', marker='o')
+plt.plot(domain_points_x, detector_2_electrons_x, label='Detector 2', marker='s')
+# Add annotations for Detector 1
+for x, y in zip(domain_points_x, detector_1_electrons_x):
+    plt.annotate(f'{y:.8e}', (x, y), textcoords="offset points", xytext=(5,-15), ha='center', fontsize=8, rotation=45)
+# Add annotations for Detector 2
+for x, y in zip(domain_points_x, detector_2_electrons_x):
+    plt.annotate(f'{y:.8e}', (x, y), textcoords="offset points", xytext=(5,-15), ha='center', fontsize=8, rotation=45)
+plt.title("Electrons Captured by Detectors 1 and 2 (X-axis Movement)")
+plt.xlabel("Domain Point (X-axis)")
+plt.ylabel("Electrons Captured")
+plt.legend()
+plt.grid(True)
+plt.tight_layout()
+plt.savefig("detectors_1_2_x_axis.png", dpi=300)
+plt.close()
+
+# --------------------------------------------------
+
+# Plot for detectors 3 and 4 (X-axis movement)
+plt.figure(figsize=(10, 6))
+plt.plot(domain_points_x, detector_3_electrons_x, label='Detector 3', marker='o')
+plt.plot(domain_points_x, detector_4_electrons_x, label='Detector 4', marker='s')
+
+# Add annotations for Detector 3
+for x, y in zip(domain_points_x, detector_3_electrons_x):
+    plt.annotate(f'{y:.8e}', (x, y), textcoords="offset points", xytext=(5,-15), ha='center', fontsize=8, rotation=45)
+
+# Add annotations for Detector 4
+for x, y in zip(domain_points_x, detector_4_electrons_x):
+    plt.annotate(f'{y:.8e}', (x, y), textcoords="offset points", xytext=(5,-15), ha='center', fontsize=8, rotation=45)
+
+# Calculate y-axis limits
+y_min = min(min(detector_3_electrons_x), min(detector_4_electrons_x))
+y_max = max(max(detector_3_electrons_x), max(detector_4_electrons_x))
+y_range = y_max - y_min
+
+# Set y-axis limits to focus on the region of interest
+plt.ylim(y_min - 0.1*y_range, y_max + 0.1*y_range)
+
+# Use scientific notation for y-axis
+plt.ticklabel_format(axis='y', style='sci', scilimits=(0,0))
+
+plt.title("Electrons Captured by Detectors 3 and 4 (X-axis Movement)")
+plt.xlabel("Domain Point (X-axis)")
+plt.ylabel("Electrons Captured")
+plt.legend()
+plt.grid(True)
+plt.tight_layout()
+plt.savefig("detectors_3_4_x_axis.png", dpi=300)
+plt.close()
+
+
+# Normalize the data by subtracting the minimum value
+detector_3_normalized = detector_3_electrons_x - np.min(detector_3_electrons_x)
+detector_4_normalized = detector_4_electrons_x - np.min(detector_4_electrons_x)
+
+plt.figure(figsize=(12, 8))  # Increase figure size for better visibility
+
+plt.plot(domain_points_x, detector_3_normalized, label='Detector 3', marker='o')
+plt.plot(domain_points_x, detector_4_normalized, label='Detector 4', marker='s')
+
+# Add annotations for Detector 3
+for x, y in zip(domain_points_x, detector_3_normalized):
+    plt.annotate(f'{y:.2f}', (x, y), textcoords="offset points", xytext=(5,-15), ha='center', fontsize=8, rotation=45)
+
+# Add annotations for Detector 4
+for x, y in zip(domain_points_x, detector_4_normalized):
+    plt.annotate(f'{y:.2f}', (x, y), textcoords="offset points", xytext=(5,-15), ha='center', fontsize=8, rotation=45)
+
+plt.title("Normalized Electrons Captured by Detectors 3 and 4 (X-axis Movement)")
+plt.xlabel("Domain Point (X-axis)")
+plt.ylabel("Electrons Captured (Normalized)")
+plt.legend()
+plt.grid(True)
+plt.tight_layout()
+plt.savefig("detectors_3_4_x_axis_normalized.png", dpi=300)
+plt.close()
+
+# --------------------------------------------------
+
+# Plot for detectors 1 and 2 (Y-axis movement)
+plt.figure(figsize=(10, 6))
+plt.plot(domain_points_y, detector_1_electrons_y, label='Detector 1', marker='o')
+plt.plot(domain_points_y, detector_2_electrons_y, label='Detector 2', marker='s')
+
+# Add annotations for Detector 1
+for x, y in zip(domain_points_y, detector_1_electrons_y):
+    plt.annotate(f'{y:.8e}', (x, y), textcoords="offset points", xytext=(5,-15), ha='center', fontsize=8, rotation=45)
+
+# Add annotations for Detector 2
+for x, y in zip(domain_points_y, detector_2_electrons_y):
+    plt.annotate(f'{y:.8e}', (x, y), textcoords="offset points", xytext=(5,-15), ha='center', fontsize=8, rotation=45)
+
+plt.title("Electrons Captured by Detectors 1 and 2 (Y-axis Movement)")
+plt.xlabel("Domain Point (Y-axis)")
+plt.ylabel("Electrons Captured")
+plt.legend()
+plt.grid(True)
+plt.tight_layout()
+plt.savefig("detectors_1_2_y_axis.png", dpi=300)
+plt.close()
+
+
+# Normalize the data by subtracting the minimum value
+detector_1_normalized = detector_1_electrons_y - np.min(detector_1_electrons_y)
+detector_2_normalized = detector_2_electrons_y - np.min(detector_2_electrons_y)
+
+plt.figure(figsize=(12, 8))  # Increase figure size for better visibility
+
+plt.plot(domain_points_y, detector_1_normalized, label='Detector 1', marker='o')
+plt.plot(domain_points_y, detector_2_normalized, label='Detector 2', marker='s')
+
+# Add annotations for Detector 1
+for x, y in zip(domain_points_y, detector_1_normalized):
+    plt.annotate(f'{y:.2f}', (x, y), textcoords="offset points", xytext=(5,-15), ha='center', fontsize=8, rotation=45)
+
+# Add annotations for Detector 2
+for x, y in zip(domain_points_y, detector_2_normalized):
+    plt.annotate(f'{y:.2f}', (x, y), textcoords="offset points", xytext=(5,-15), ha='center', fontsize=8, rotation=45)
+
+plt.title("Normalized Electrons Captured by Detectors 1 and 2 (Y-axis Movement)")
+plt.xlabel("Domain Point (Y-axis)")
+plt.ylabel("Electrons Captured (Normalized)")
+plt.legend()
+plt.grid(True)
+plt.tight_layout()
+plt.savefig("detectors_1_2_y_axis_normalized.png", dpi=300)
+plt.close()
+
+# --------------------------------------------------
+
+# Plot for detectors 3 and 4 (Y-axis movement)
+plt.figure(figsize=(10, 6))
+plt.plot(domain_points_y, detector_3_electrons_y, label='Detector 3', marker='o')
+plt.plot(domain_points_y, detector_4_electrons_y, label='Detector 4', marker='s')
+
+# Add annotations for Detector 3
+for x, y in zip(domain_points_y, detector_3_electrons_y):
+    plt.annotate(f'{y:.8e}', (x, y), textcoords="offset points", xytext=(5,-15), ha='center', fontsize=8, rotation=45)
+
+# Add annotations for Detector 4
+for x, y in zip(domain_points_y, detector_4_electrons_y):
+    plt.annotate(f'{y:.8e}', (x, y), textcoords="offset points", xytext=(5,-15), ha='center', fontsize=8, rotation=45)
+
+plt.title("Electrons Captured by Detectors 3 and 4 (Y-axis Movement)")
+plt.xlabel("Domain Point (Y-axis)")
+plt.ylabel("Electrons Captured")
+plt.legend()
+plt.grid(True)
+plt.tight_layout()
+plt.savefig("detectors_3_4_y_axis.png", dpi=300)
+plt.close()
+'''
+# --------------------------------------------------
+
+'''
+# Create meshgrid for x and y coordinates
+X, Y = np.meshgrid(np.arange(beam_x_start, beam_x_end), np.arange(beam_y_start, beam_y_end))
+
+# Extract electron counts for each detector
+detector_1_electrons = electrons_per_detector_per_step[beam_x_start:beam_x_end, beam_y_start:beam_y_end, 0]
+detector_2_electrons = electrons_per_detector_per_step[beam_x_start:beam_x_end, beam_y_start:beam_y_end, 1]
+detector_3_electrons = electrons_per_detector_per_step[beam_x_start:beam_x_end, beam_y_start:beam_y_end, 2]
+detector_4_electrons = electrons_per_detector_per_step[beam_x_start:beam_x_end, beam_y_start:beam_y_end, 3]
+
+# Plot for detectors 1 and 2
+plt.figure(figsize=(12, 8))
+plt.pcolormesh(X, Y, detector_1_electrons, shading='auto', cmap='viridis')
+plt.colorbar(label='Electrons Captured (Detector 1)')
+plt.title("Electrons Captured by Detector 1")
+plt.xlabel("Domain Point (X-axis)")
+plt.ylabel("Domain Point (Y-axis)")
+plt.tight_layout()
+plt.savefig("detector_1_heatmap.png", dpi=300)
+plt.close()
+
+plt.figure(figsize=(12, 8))
+plt.pcolormesh(X, Y, detector_2_electrons, shading='auto', cmap='viridis')
+plt.colorbar(label='Electrons Captured (Detector 2)')
+plt.title("Electrons Captured by Detector 2")
+plt.xlabel("Domain Point (X-axis)")
+plt.ylabel("Domain Point (Y-axis)")
+plt.tight_layout()
+plt.savefig("detector_2_heatmap.png", dpi=300)
+plt.close()
+
+# Plot for detectors 3 and 4
+plt.figure(figsize=(12, 8))
+plt.pcolormesh(X, Y, detector_3_electrons, shading='auto', cmap='viridis')
+plt.colorbar(label='Electrons Captured (Detector 3)')
+plt.title("Electrons Captured by Detector 3")
+plt.xlabel("Domain Point (X-axis)")
+plt.ylabel("Domain Point (Y-axis)")
+plt.tight_layout()
+plt.savefig("detector_3_heatmap.png", dpi=300)
+plt.close()
+
+plt.figure(figsize=(12, 8))
+plt.pcolormesh(X, Y, detector_4_electrons, shading='auto', cmap='viridis')
+plt.colorbar(label='Electrons Captured (Detector 4)')
+plt.title("Electrons Captured by Detector 4")
+plt.xlabel("Domain Point (X-axis)")
+plt.ylabel("Domain Point (Y-axis)")
+plt.tight_layout()
+plt.savefig("detector_4_heatmap.png", dpi=300)
+plt.close()
+'''
diff --git a/tests/boundary/CMakeLists.txt b/tests/boundary/CMakeLists.txt
index 82ced321b3efd91cd261ad3567cf0b067a457775..8dd843d974f81d9e7ce4521f2f88604596bd618d 100644
--- a/tests/boundary/CMakeLists.txt
+++ b/tests/boundary/CMakeLists.txt
@@ -7,3 +7,14 @@
 
 waLBerla_compile_test( FILES BoundaryHandling.cpp )
 waLBerla_execute_test( NAME BoundaryHandling )
+
+if (WALBERLA_BUILD_WITH_PYTHON)
+
+    waLBerla_link_files_to_builddir( *.py )
+
+    waLBerla_compile_test( FILES TestShiftedPeriodicity.cpp DEPENDS blockforest field python_coupling )
+    waLBerla_execute_test( NAME TestShiftedPeriodicity1 COMMAND $<TARGET_FILE:TestShiftedPeriodicity> ${CMAKE_CURRENT_SOURCE_DIR}/TestShiftedPeriodicitySetup.py )
+    waLBerla_execute_test( NAME TestShiftedPeriodicity2 COMMAND $<TARGET_FILE:TestShiftedPeriodicity> ${CMAKE_CURRENT_SOURCE_DIR}/TestShiftedPeriodicitySetup.py PROCESSES 2 )
+    waLBerla_execute_test( NAME TestShiftedPeriodicity4 COMMAND $<TARGET_FILE:TestShiftedPeriodicity> ${CMAKE_CURRENT_SOURCE_DIR}/TestShiftedPeriodicitySetup.py PROCESSES 4 )
+
+endif()
\ No newline at end of file
diff --git a/tests/boundary/TestShiftedPeriodicity.cpp b/tests/boundary/TestShiftedPeriodicity.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2351a2f37896cbaaa8d725701b3c33f2dd2396d8
--- /dev/null
+++ b/tests/boundary/TestShiftedPeriodicity.cpp
@@ -0,0 +1,310 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file TestShiftedPeriodicity.cpp
+//! \ingroup boundary
+//! \author Helen Schottenhamml <helen.schottenhamml@fau.de>
+//
+//======================================================================================================================
+
+#include "blockforest/StructuredBlockForest.h"
+
+#include "core/cell/Cell.h"
+#include "core/cell/CellInterval.h"
+#include "core/debug/CheckFunctions.h"
+#include "core/logging/Initialization.h"
+#include "core/math/Vector3.h"
+#include "core/mpi/Environment.h"
+#include "core/mpi/MPIWrapper.h"
+#include "core/mpi/MPIManager.h"
+#include "core/mpi/Operation.h"
+#include "core/mpi/Reduce.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+
+#include "field/Layout.h"
+//#include "field/vtk/VTKWriter.h"
+
+#include "python_coupling/CreateConfig.h"
+
+#include "stencil/D3Q27.h"
+#include "stencil/Directions.h"
+
+#include <blockforest/Initialization.h>
+#include <boundary/ShiftedPeriodicity.h>
+#include <core/DataTypes.h>
+#include <core/debug/Debug.h>
+#include <core/debug/TestSubsystem.h>
+#include <field/AddToStorage.h>
+#include <field/GhostLayerField.h>
+#include <memory>
+#include <vector>
+
+namespace walberla {
+
+using Stencil_T = stencil::D3Q27;
+
+using ValueType_T = real_t;
+using Field_T = GhostLayerField< ValueType_T, 3 >;
+
+constexpr cell_idx_t fieldGhostLayers = 1;
+
+//////////
+// MAIN //
+//////////
+
+template< typename FieldType_T >
+class FieldInitialiser {
+
+ public:
+   FieldInitialiser( const std::weak_ptr< StructuredBlockForest > & sbf, const BlockDataID fieldId )
+   : sbf_(sbf), fieldId_(fieldId)
+   {}
+
+   void operator()() {
+
+
+      const auto blocks = sbf_.lock();
+      WALBERLA_ASSERT_NOT_NULLPTR(blocks)
+
+      for ( auto & block : *blocks )
+      {
+         // get field
+         auto * field = block.template getData<FieldType_T>(fieldId_);
+         WALBERLA_ASSERT_NOT_NULLPTR(field)
+
+         // get cell interval
+         auto ci = field->xyzSizeWithGhostLayer();
+
+         for (const auto& cell : ci) {
+
+            // get global coordinates
+            Cell globalCell;
+            blocks->transformBlockLocalToGlobalCell(globalCell, block, cell);
+
+            for (uint_t d = 0; d < FieldType_T::F_SIZE; ++d)
+            {
+               field->get(cell, d) = real_c(  (globalCell.x() + 2 * fieldGhostLayers)
+                                            * (globalCell.y() + 2 * fieldGhostLayers)
+                                            * (globalCell.z() + 2 * fieldGhostLayers)
+                                            * int_c(d + 1));
+            }
+         }
+      }
+
+   }
+
+ private:
+   std::weak_ptr< StructuredBlockForest > sbf_{};
+   const BlockDataID fieldId_{};
+
+};
+
+
+int main( int argc, char **argv ) {
+
+   const mpi::Environment env(argc, argv);
+
+   for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg)
+   {
+      auto config = *cfg;
+      logging::configureLogging(config);
+
+      // create the domain, flag field and vector field (non-uniform initialisation)
+      auto blocks = blockforest::createUniformBlockGridFromConfig(config->getBlock("DomainSetup"), nullptr, true);
+
+      const auto fieldID = field::addToStorage< Field_T >(blocks, "test field", real_t(0), field::Layout::fzyx, fieldGhostLayers);
+      FieldInitialiser< Field_T > initialiser(blocks, fieldID);
+
+      // re-initialise fields
+      initialiser();
+
+      // create periodic shift boundary condition
+
+      const auto spConfig = config->getBlock("Boundaries").getBlock("ShiftedPeriodicity");
+      const uint_t shiftDir = spConfig.getParameter<uint_t>("shiftDir");
+      const int shiftValue = spConfig.getParameter<int>("shiftValue");
+      const uint_t normalDir = spConfig.getParameter<uint_t>("normalDir");
+
+      boundary::ShiftedPeriodicity<Field_T> shiftedPeriodicity(
+         blocks, fieldID, fieldGhostLayers, normalDir, shiftDir, shiftValue
+      );
+
+//      auto vtkOutput = field::createVTKOutput<Field_T>(fieldID, *blocks, "test_field", 1, fieldGhostLayers,
+//                                                         false, "vtk_out", "simulation_step", false, false);
+//      vtkOutput();
+
+      // apply boundary condition and standard communication
+      shiftedPeriodicity();
+
+//      vtkOutput();
+
+      /// compare values
+      // create local domain slices to compare values before and after shift
+
+      const uint_t remDir = 3 - shiftDir - normalDir;
+      const auto shift = shiftedPeriodicity.shift();
+
+      const uint_t shiftSize = blocks->getNumberOfCells(shiftDir);
+      const uint_t remSize   = blocks->getNumberOfCells(remDir);
+      const uint_t sizeSlice = shiftSize * remSize * Field_T::F_SIZE;
+
+      std::vector<ValueType_T> innerMin(sizeSlice, ValueType_T(0));
+      std::vector<ValueType_T> innerMax(sizeSlice, ValueType_T(0));
+      std::vector<ValueType_T> glMin(sizeSlice, ValueType_T(0));
+      std::vector<ValueType_T> glMax(sizeSlice, ValueType_T(0));
+
+      auto getIdx = [&remSize](const cell_idx_t shiftIdx, const cell_idx_t remIdx){
+         return shiftIdx * cell_idx_c(Field_T::F_SIZE * remSize)
+                + remIdx * cell_idx_c(Field_T::F_SIZE);
+      };
+
+      // fill slices for comparing values
+      for(auto & block : *blocks) {
+         const bool atMin = blocks->atDomainMinBorder(normalDir, block);
+         const bool atMax = blocks->atDomainMaxBorder(normalDir, block);
+         if(!atMin && !atMax)
+            continue;
+
+         auto * field = block.getData<Field_T>(fieldID);
+         WALBERLA_ASSERT_NOT_NULLPTR(field)
+
+         // fill innerMin and glMin
+         if(atMin) {
+            Vector3<int> dirVector{};
+            dirVector[normalDir] = -1;
+            const auto dir = stencil::vectorToDirection(dirVector);
+
+            CellInterval innerMinCI;
+            CellInterval glMinCI;
+            field->getSliceBeforeGhostLayer(dir, innerMinCI, fieldGhostLayers, false);
+            field->getGhostRegion(dir, glMinCI, fieldGhostLayers, false);
+
+            // fill inner min
+            for(const auto & cell : innerMinCI){
+               Cell globalCell;
+               blocks->transformBlockLocalToGlobalCell(globalCell, block, cell);
+
+               cell_idx_t idxShiftDir = globalCell[shiftDir] - shift[shiftDir];
+               if(idxShiftDir >= cell_idx_c(shiftSize)) idxShiftDir -= cell_idx_c(shiftSize);
+               if(idxShiftDir <= - int_c(fieldGhostLayers)) idxShiftDir += cell_idx_c(shiftSize);
+
+               const cell_idx_t idx = getIdx(idxShiftDir, cell_idx_c(globalCell[remDir]));
+
+               for(cell_idx_t f = 0; f < cell_idx_c(Field_T::F_SIZE); ++f) {
+                  WALBERLA_ASSERT(field->coordinatesValid(cell.x(), cell.y(), cell.z(), f))
+                  WALBERLA_ASSERT_LESS(idx + f, innerMin.size())
+
+                  innerMin[uint_c(idx + f)] = field->get(cell, f);
+               }
+            }
+
+            // fill gl min
+            for(const auto & cell : glMinCI){
+               Cell globalCell;
+               blocks->transformBlockLocalToGlobalCell(globalCell, block, cell);
+
+               const cell_idx_t idx = getIdx(cell_idx_c(globalCell[shiftDir]), globalCell[remDir]);
+
+               for(cell_idx_t f = 0; f < cell_idx_c(Field_T::F_SIZE); ++f) {
+                  WALBERLA_ASSERT(field->coordinatesValid(cell.x(), cell.y(), cell.z(), f))
+                  WALBERLA_ASSERT_LESS(idx + f, glMin.size())
+
+                  glMin[uint_c(idx + f)] = field->get(cell, f);
+               }
+            }
+         }
+
+         // fill innerMax and glMax
+         if(atMax) {
+            Vector3<int> dirVector{};
+            dirVector[normalDir] = 1;
+            const auto dir = stencil::vectorToDirection(dirVector);
+
+            CellInterval innerMaxCI;
+            CellInterval glMaxCI;
+            field->getSliceBeforeGhostLayer(dir, innerMaxCI, fieldGhostLayers, false);
+            field->getGhostRegion(dir, glMaxCI, fieldGhostLayers, false);
+
+            // fill inner max
+            for(const auto & cell : innerMaxCI){
+               Cell globalCell;
+               blocks->transformBlockLocalToGlobalCell(globalCell, block, cell);
+
+               cell_idx_t idxShiftDir = globalCell[shiftDir] + shift[shiftDir];
+               if(idxShiftDir >= cell_idx_c(shiftSize)) idxShiftDir -= cell_idx_c(shiftSize);
+               if(idxShiftDir <= - int_c(fieldGhostLayers)) idxShiftDir += cell_idx_c(shiftSize);
+
+               const cell_idx_t idx = getIdx(idxShiftDir, cell_idx_c(globalCell[remDir]));
+
+               for(cell_idx_t f = 0; f < cell_idx_c(Field_T::F_SIZE); ++f) {
+                  WALBERLA_ASSERT(field->coordinatesValid(cell.x(), cell.y(), cell.z(), f))
+                  WALBERLA_ASSERT_LESS(idx + f, innerMax.size())
+
+                  innerMax[uint_c(idx + f)] = field->get(cell, f);
+               }
+            }
+
+            // fill gl max
+            for(const auto & cell : glMaxCI){
+               Cell globalCell;
+               blocks->transformBlockLocalToGlobalCell(globalCell, block, cell);
+
+               const cell_idx_t idx = getIdx(cell_idx_c(globalCell[shiftDir]), cell_idx_c(globalCell[remDir]));
+
+               for(cell_idx_t f = 0; f < cell_idx_c(Field_T::F_SIZE); ++f) {
+                  WALBERLA_ASSERT(field->coordinatesValid(cell.x(), cell.y(), cell.z(), f))
+                  WALBERLA_ASSERT_LESS(idx + f, glMax.size())
+
+                  glMax[uint_c(idx + f)] = field->get(cell, f);
+               }
+            }
+         }
+
+
+      }
+
+      WALBERLA_MPI_SECTION() {
+
+         mpi::reduceInplace(innerMin, mpi::SUM);
+         mpi::reduceInplace(innerMax, mpi::SUM);
+         mpi::reduceInplace(glMin, mpi::SUM);
+         mpi::reduceInplace(glMax, mpi::SUM);
+
+      }
+
+      WALBERLA_ROOT_SECTION() {
+         for(uint_t i = 0; i < sizeSlice; ++i) {
+            WALBERLA_CHECK_FLOAT_EQUAL(innerMin[i], glMax[i])
+            WALBERLA_CHECK_FLOAT_EQUAL(innerMax[i], glMin[i])
+         }
+      }
+
+   }
+
+   return 0;
+}
+
+} // namespace walberla
+
+
+
+int main(int argc, char **argv) {
+
+   walberla::debug::enterTestMode();
+
+   return walberla::main(argc,argv);
+}
diff --git a/tests/boundary/TestShiftedPeriodicitySetup.py b/tests/boundary/TestShiftedPeriodicitySetup.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ca890c22f56abb1449ad2f55d63c1350fa029b9
--- /dev/null
+++ b/tests/boundary/TestShiftedPeriodicitySetup.py
@@ -0,0 +1,43 @@
+import waLBerla as wlb
+
+
+class Scenario:
+    def __init__(self, normal_dir, shift_dir, shift_value, periodicity):
+        self.normal_dir = normal_dir
+        self.shift_dir = shift_dir
+        self.shift_value = shift_value
+        self.periodicity = tuple(periodicity)
+
+    @wlb.member_callback
+    def config(self):
+
+        return {
+            'DomainSetup': {
+                'blocks': (3, 3, 3),
+                'cellsPerBlock': (4, 4, 4),
+                'cartesianSetup': 0,
+                'periodic': self.periodicity,
+            },
+            'Boundaries': {
+                'ShiftedPeriodicity': {
+                    'shiftDir': self.shift_dir,
+                    'shiftValue': self.shift_value,
+                    'normalDir': self.normal_dir
+                }
+            },
+            'Logging': {
+                'logLevel': "Info"
+            }
+        }
+
+
+scenarios = wlb.ScenarioManager()
+
+for normal_dir in (0, 1, 2):
+    for shift_dir in (0, 1, 2):
+        if normal_dir == shift_dir:
+            continue
+        periodicity = 3 * [0]
+        periodicity[shift_dir] = 1
+        for shift_value in (-3, 0, 2, 5, 8, 15):
+            scenarios.add(Scenario(normal_dir, shift_dir, shift_value, periodicity))
diff --git a/tests/field/codegen/Poisson.py b/tests/field/codegen/Poisson.py
index 8e27d5f8dcf135c820d3de45d1a943205ac921ca..898f08d8d7324b17ef22ac8f2ca517c6ca2caf71 100644
--- a/tests/field/codegen/Poisson.py
+++ b/tests/field/codegen/Poisson.py
@@ -12,8 +12,8 @@ with CodeGeneration() as ctx:
 
     @ps.kernel
     def kernel_func():
-        src[0, 0] @= ((dy2 * (src[1, 0] + src[-1, 0]))
+        dst[0, 0] @= ((dy2 * (src[1, 0] + src[-1, 0]))
                       + (dx2 * (src[0, 1] + src[0, -1]))
                       - (rhs[0, 0] * dx2 * dy2)) / (2.0 * (dx2 + dy2))
 
-    generate_sweep(ctx, 'Poisson', kernel_func)
+    generate_sweep(ctx, 'Poisson', kernel_func, field_swaps=[(src, dst), ])
diff --git a/tests/gpu/CMakeLists.txt b/tests/gpu/CMakeLists.txt
index e760cca4db10704cbedfef89a4faca71631ea730..18dab47d95569fd7445f0811b100c94cb5315829 100644
--- a/tests/gpu/CMakeLists.txt
+++ b/tests/gpu/CMakeLists.txt
@@ -56,4 +56,14 @@ waLBerla_generate_target_from_python(NAME CodegenGeneratedGPUFieldPackInfo FILE
 waLBerla_compile_test( FILES codegen/GeneratedFieldPackInfoTestGPU.cpp
         DEPENDS blockforest core field CodegenGeneratedGPUFieldPackInfo )
 waLBerla_execute_test( NAME GeneratedFieldPackInfoTestGPU )
+
+    if (WALBERLA_BUILD_WITH_PYTHON)
+
+        waLBerla_link_files_to_builddir( *.py )
+
+        waLBerla_compile_test( FILES TestShiftedPeriodicityGPU.cpp DEPENDS gpu blockforest field python_coupling )
+        waLBerla_execute_test( NAME TestShiftedPeriodicityGPU COMMAND $<TARGET_FILE:TestShiftedPeriodicityGPU> ${CMAKE_CURRENT_SOURCE_DIR}/TestShiftedPeriodicitySetupGPU.py )
+
+    endif()
+
 endif()
diff --git a/tests/gpu/TestShiftedPeriodicityGPU.cpp b/tests/gpu/TestShiftedPeriodicityGPU.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..02cd9777e9535e214a18303b929ae9289fa3f562
--- /dev/null
+++ b/tests/gpu/TestShiftedPeriodicityGPU.cpp
@@ -0,0 +1,319 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file TestShiftedPeriodicityGPU.cpp
+//! \ingroup gpu
+//! \author Helen Schottenhamml <helen.schottenhamml@fau.de>
+//
+//======================================================================================================================
+
+#include "blockforest/StructuredBlockForest.h"
+
+#include "core/cell/Cell.h"
+#include "core/cell/CellInterval.h"
+#include "core/debug/CheckFunctions.h"
+#include "core/logging/Initialization.h"
+#include "core/math/Vector3.h"
+#include "core/mpi/Environment.h"
+#include "core/mpi/MPIWrapper.h"
+#include "core/mpi/MPIManager.h"
+#include "core/mpi/Operation.h"
+#include "core/mpi/Reduce.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+
+#include "field/Layout.h"
+#include "field/vtk/VTKWriter.h"
+
+#include "python_coupling/CreateConfig.h"
+
+#include "stencil/D3Q27.h"
+#include "stencil/Directions.h"
+
+#include <blockforest/Initialization.h>
+#include <gpu/ShiftedPeriodicity.h>
+#include <gpu/AddGPUFieldToStorage.h>
+#include <gpu/FieldCopy.h>
+#include <core/DataTypes.h>
+#include <core/debug/Debug.h>
+#include <core/debug/TestSubsystem.h>
+#include <field/AddToStorage.h>
+#include <field/GhostLayerField.h>
+#include <memory>
+#include <vector>
+
+namespace walberla {
+
+using Stencil_T = stencil::D3Q27;
+
+using ValueType_T = real_t;
+using Field_T = GhostLayerField< ValueType_T, 3 >;
+using GPUField_T = gpu::GPUField< ValueType_T >;
+
+constexpr cell_idx_t fieldGhostLayers = 1;
+
+//////////
+// MAIN //
+//////////
+
+template< typename FieldType_T >
+class FieldInitialiser {
+
+ public:
+   FieldInitialiser( const std::weak_ptr< StructuredBlockForest > & sbf, const BlockDataID fieldId )
+   : sbf_(sbf), fieldId_(fieldId)
+   {}
+
+   void operator()() {
+
+
+      const auto blocks = sbf_.lock();
+      WALBERLA_ASSERT_NOT_NULLPTR(blocks)
+
+      for ( auto & block : *blocks )
+      {
+         // get field
+         auto * field = block.template getData<FieldType_T>(fieldId_);
+         WALBERLA_ASSERT_NOT_NULLPTR(field)
+
+         // get cell interval
+         auto ci = field->xyzSizeWithGhostLayer();
+
+         for (const auto& cell : ci) {
+
+            // get global coordinates
+            Cell globalCell;
+            blocks->transformBlockLocalToGlobalCell(globalCell, block, cell);
+
+            for (uint_t d = 0; d < FieldType_T::F_SIZE; ++d)
+            {
+               field->get(cell, d) = real_c(  (globalCell.x() + 2 * fieldGhostLayers)
+                                            * (globalCell.y() + 2 * fieldGhostLayers)
+                                            * (globalCell.z() + 2 * fieldGhostLayers)
+                                            * int_c(d + 1));
+            }
+         }
+      }
+
+   }
+
+ private:
+   std::weak_ptr< StructuredBlockForest > sbf_{};
+   const BlockDataID fieldId_{};
+
+};
+
+
+int main( int argc, char **argv ) {
+
+   const mpi::Environment env(argc, argv);
+
+   for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg)
+   {
+      auto config = *cfg;
+      logging::configureLogging(config);
+
+      // create the domain, flag field and vector field (non-uniform initialisation)
+      auto blocks = blockforest::createUniformBlockGridFromConfig(config->getBlock("DomainSetup"), nullptr, true);
+
+      const auto h_fieldID = field::addToStorage< Field_T >(blocks, "test field CPU", real_t(0), field::Layout::fzyx, fieldGhostLayers);
+      FieldInitialiser< Field_T > initialiser(blocks, h_fieldID);
+
+      const auto d_fieldID = gpu::addGPUFieldToStorage< Field_T >(blocks, h_fieldID, "test field GPU");
+
+      // re-initialise fields
+      initialiser();
+
+      // create periodic shift boundary condition
+
+      const auto spConfig = config->getBlock("Boundaries").getBlock("ShiftedPeriodicity");
+      const uint_t shiftDir = spConfig.getParameter<uint_t>("shiftDir");
+      const int shiftValue = spConfig.getParameter<int>("shiftValue");
+      const uint_t normalDir = spConfig.getParameter<uint_t>("normalDir");
+      ;
+      gpu::ShiftedPeriodicityGPU<GPUField_T> shiftedPeriodicity(
+         blocks, d_fieldID, fieldGhostLayers, normalDir, shiftDir, shiftValue
+      );
+
+//      auto vtkOutput = field::createVTKOutput<Field_T>(fieldID, *blocks, "test_field", 1, fieldGhostLayers,
+//                                                         false, "vtk_out", "simulation_step", false, false);
+//      vtkOutput();
+
+      // apply boundary condition and standard communication
+      shiftedPeriodicity();
+
+//      vtkOutput();
+
+      /// compare values
+
+      // copy resulting GPU to CPU
+      gpu::fieldCpy<Field_T, GPUField_T>(blocks, h_fieldID, d_fieldID);
+
+      // create local domain slices to compare values before and after shift
+
+      const uint_t remDir = 3 - shiftDir - normalDir;
+      const auto shift = shiftedPeriodicity.shift();
+
+      const uint_t shiftSize = blocks->getNumberOfCells(shiftDir);
+      const uint_t remSize   = blocks->getNumberOfCells(remDir);
+      const uint_t sizeSlice = shiftSize * remSize * Field_T::F_SIZE;
+
+      std::vector<ValueType_T> innerMin(sizeSlice, ValueType_T(0));
+      std::vector<ValueType_T> innerMax(sizeSlice, ValueType_T(0));
+      std::vector<ValueType_T> glMin(sizeSlice, ValueType_T(0));
+      std::vector<ValueType_T> glMax(sizeSlice, ValueType_T(0));
+
+      auto getIdx = [&remSize](const cell_idx_t shiftIdx, const cell_idx_t remIdx){
+         return shiftIdx * cell_idx_c(Field_T::F_SIZE * remSize)
+                + remIdx * cell_idx_c(Field_T::F_SIZE);
+      };
+
+      // fill slices for comparing values
+      for(auto & block : *blocks) {
+         const bool atMin = blocks->atDomainMinBorder(normalDir, block);
+         const bool atMax = blocks->atDomainMaxBorder(normalDir, block);
+         if(!atMin && !atMax)
+            continue;
+
+         auto * field = block.getData<Field_T>(h_fieldID);
+         WALBERLA_ASSERT_NOT_NULLPTR(field)
+
+         // fill innerMin and glMin
+         if(atMin) {
+            Vector3<int> dirVector{};
+            dirVector[normalDir] = -1;
+            const auto dir = stencil::vectorToDirection(dirVector);
+
+            CellInterval innerMinCI;
+            CellInterval glMinCI;
+            field->getSliceBeforeGhostLayer(dir, innerMinCI, fieldGhostLayers, false);
+            field->getGhostRegion(dir, glMinCI, fieldGhostLayers, false);
+
+            // fill inner min
+            for(const auto & cell : innerMinCI){
+               Cell globalCell;
+               blocks->transformBlockLocalToGlobalCell(globalCell, block, cell);
+
+               cell_idx_t idxShiftDir = globalCell[shiftDir] - shift[shiftDir];
+               if(idxShiftDir >= cell_idx_c(shiftSize)) idxShiftDir -= cell_idx_c(shiftSize);
+               if(idxShiftDir <= - int_c(fieldGhostLayers)) idxShiftDir += cell_idx_c(shiftSize);
+
+               const cell_idx_t idx = getIdx(idxShiftDir, cell_idx_c(globalCell[remDir]));
+
+               for(cell_idx_t f = 0; f < cell_idx_c(Field_T::F_SIZE); ++f) {
+                  WALBERLA_ASSERT(field->coordinatesValid(cell.x(), cell.y(), cell.z(), f))
+                  WALBERLA_ASSERT_LESS(idx + f, innerMin.size())
+
+                  innerMin[uint_c(idx + f)] = field->get(cell, f);
+               }
+            }
+
+            // fill gl min
+            for(const auto & cell : glMinCI){
+               Cell globalCell;
+               blocks->transformBlockLocalToGlobalCell(globalCell, block, cell);
+
+               const cell_idx_t idx = getIdx(cell_idx_c(globalCell[shiftDir]), globalCell[remDir]);
+
+               for(cell_idx_t f = 0; f < cell_idx_c(Field_T::F_SIZE); ++f) {
+                  WALBERLA_ASSERT(field->coordinatesValid(cell.x(), cell.y(), cell.z(), f))
+                  WALBERLA_ASSERT_LESS(idx + f, glMin.size())
+
+                  glMin[uint_c(idx + f)] = field->get(cell, f);
+               }
+            }
+         }
+
+         // fill innerMax and glMax
+         if(atMax) {
+            Vector3<int> dirVector{};
+            dirVector[normalDir] = 1;
+            const auto dir = stencil::vectorToDirection(dirVector);
+
+            CellInterval innerMaxCI;
+            CellInterval glMaxCI;
+            field->getSliceBeforeGhostLayer(dir, innerMaxCI, fieldGhostLayers, false);
+            field->getGhostRegion(dir, glMaxCI, fieldGhostLayers, false);
+
+            // fill inner max
+            for(const auto & cell : innerMaxCI){
+               Cell globalCell;
+               blocks->transformBlockLocalToGlobalCell(globalCell, block, cell);
+
+               cell_idx_t idxShiftDir = globalCell[shiftDir] + shift[shiftDir];
+               if(idxShiftDir >= cell_idx_c(shiftSize)) idxShiftDir -= cell_idx_c(shiftSize);
+               if(idxShiftDir <= - int_c(fieldGhostLayers)) idxShiftDir += cell_idx_c(shiftSize);
+
+               const cell_idx_t idx = getIdx(idxShiftDir, cell_idx_c(globalCell[remDir]));
+
+               for(cell_idx_t f = 0; f < cell_idx_c(Field_T::F_SIZE); ++f) {
+                  WALBERLA_ASSERT(field->coordinatesValid(cell.x(), cell.y(), cell.z(), f))
+                  WALBERLA_ASSERT_LESS(idx + f, innerMax.size())
+
+                  innerMax[uint_c(idx + f)] = field->get(cell, f);
+               }
+            }
+
+            // fill gl min
+            for(const auto & cell : glMaxCI){
+               Cell globalCell;
+               blocks->transformBlockLocalToGlobalCell(globalCell, block, cell);
+
+               const cell_idx_t idx = getIdx(cell_idx_c(globalCell[shiftDir]), cell_idx_c(globalCell[remDir]));
+
+               for(cell_idx_t f = 0; f < cell_idx_c(Field_T::F_SIZE); ++f) {
+                  WALBERLA_ASSERT(field->coordinatesValid(cell.x(), cell.y(), cell.z(), f))
+                  WALBERLA_ASSERT_LESS(idx + f, glMax.size())
+
+                  glMax[uint_c(idx + f)] = field->get(cell, f);
+               }
+            }
+         }
+
+
+      }
+
+      WALBERLA_MPI_SECTION() {
+
+         mpi::reduceInplace(innerMin, mpi::SUM);
+         mpi::reduceInplace(innerMax, mpi::SUM);
+         mpi::reduceInplace(glMin, mpi::SUM);
+         mpi::reduceInplace(glMax, mpi::SUM);
+
+      }
+
+      WALBERLA_ROOT_SECTION() {
+         for(uint_t i = 0; i < sizeSlice; ++i) {
+            WALBERLA_CHECK_FLOAT_EQUAL(innerMin[i], glMax[i])
+            WALBERLA_CHECK_FLOAT_EQUAL(innerMax[i], glMin[i])
+         }
+      }
+
+   }
+
+   return 0;
+}
+
+} // namespace walberla
+
+
+
+int main(int argc, char **argv) {
+
+   walberla::debug::enterTestMode();
+
+   return walberla::main(argc,argv);
+}
diff --git a/tests/gpu/TestShiftedPeriodicitySetupGPU.py b/tests/gpu/TestShiftedPeriodicitySetupGPU.py
new file mode 100644
index 0000000000000000000000000000000000000000..06443685aa1119fbe6391d89e64ae048e047cd93
--- /dev/null
+++ b/tests/gpu/TestShiftedPeriodicitySetupGPU.py
@@ -0,0 +1,40 @@
+import waLBerla as wlb
+
+
+class Scenario:
+    def __init__(self, normal_dir, shift_dir, shift_value, periodicity):
+        self.normal_dir = normal_dir
+        self.shift_dir = shift_dir
+        self.shift_value = shift_value
+        self.periodicity = tuple(periodicity)
+
+    @wlb.member_callback
+    def config(self):
+
+        return {
+            'DomainSetup': {
+                'blocks': (3, 3, 3),
+                'cellsPerBlock': (4, 4, 4),
+                'cartesianSetup': 0,
+                'periodic': self.periodicity,
+            },
+            'Boundaries': {
+                'ShiftedPeriodicity': {
+                    'shiftDir': self.shift_dir,
+                    'shiftValue': self.shift_value,
+                    'normalDir': self.normal_dir
+                }
+            }
+        }
+
+
+scenarios = wlb.ScenarioManager()
+
+for normal_dir in (0, 1, 2):
+    for shift_dir in (0, 1, 2):
+        if normal_dir == shift_dir:
+            continue
+        periodicity = 3 * [0]
+        periodicity[shift_dir] = 1
+        for shift_value in (-3, 0, 2, 5, 8, 15):
+            scenarios.add(Scenario(normal_dir, shift_dir, shift_value, periodicity))
diff --git a/tests/gpu/codegen/CudaPoisson.py b/tests/gpu/codegen/CudaPoisson.py
index 24e471a8d93fea83928cc6e5c9f3ece881a204e4..d9d4ce892556505f412f94a6af57821f49beb7f8 100644
--- a/tests/gpu/codegen/CudaPoisson.py
+++ b/tests/gpu/codegen/CudaPoisson.py
@@ -12,8 +12,8 @@ with CodeGeneration() as ctx:
 
     @ps.kernel
     def kernel_func():
-        src[0, 0] @= ((dy**2 * (src[1, 0] + src[-1, 0]))
+        dst[0, 0] @= ((dy**2 * (src[1, 0] + src[-1, 0]))
                       + (dx**2 * (src[0, 1] + src[0, -1]))
                       - (rhs[0, 0] * dx**2 * dy**2)) / (2 * (dx**2 + dy**2))
 
-    generate_sweep(ctx, 'PoissonGPU', kernel_func, target=ps.Target.GPU)
+    generate_sweep(ctx, 'PoissonGPU', kernel_func, field_swaps=[(src, dst), ], target=ps.Target.GPU)