diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9422543f7b89c190ab78a03daa43dc227d626a8f..c4dc0ffe6292e737ea319162a0efd7106a1a32fb 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -51,6 +51,7 @@ stages: -DWALBERLA_BUILD_WITH_METIS=$WALBERLA_BUILD_WITH_METIS -DWALBERLA_BUILD_WITH_PARMETIS=$WALBERLA_BUILD_WITH_PARMETIS -DWALBERLA_BUILD_WITH_FFTW=$WALBERLA_BUILD_WITH_FFTW + -DWALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT=$WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT -DWALBERLA_ENABLE_GUI=$WALBERLA_ENABLE_GUI -DWALBERLA_BUILD_WITH_CODEGEN=$WALBERLA_BUILD_WITH_CODEGEN -DWALBERLA_STL_BOUNDS_CHECKS=$WALBERLA_STL_BOUNDS_CHECKS @@ -269,9 +270,9 @@ icx_2022_hybrid_dbg_sp: - cuda11 - docker -gcc_9_serial: +gcc_10_serial: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-9 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -293,9 +294,9 @@ gcc_9_serial: - cuda11 - docker -gcc_9_mpionly: +gcc_10_mpionly: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-9 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -315,9 +316,9 @@ gcc_9_mpionly: - cuda11 - docker -gcc_9_hybrid: +gcc_10_hybrid: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-9 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -336,9 +337,9 @@ gcc_9_hybrid: - cuda11 - docker -gcc_9_serial_dbg: +gcc_10_serial_dbg: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-9 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -361,9 +362,9 @@ gcc_9_serial_dbg: - cuda11 - docker -gcc_9_mpionly_dbg: +gcc_10_mpionly_dbg: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-9 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -384,9 +385,9 @@ gcc_9_mpionly_dbg: - cuda11 - docker -gcc_9_hybrid_dbg: +gcc_10_hybrid_dbg: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-9 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -403,9 +404,9 @@ gcc_9_hybrid_dbg: - cuda11 - docker -gcc_9_hybrid_dbg_sp: +gcc_10_hybrid_dbg_sp: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-9 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -428,9 +429,9 @@ gcc_9_hybrid_dbg_sp: - cuda11 - docker -gcc_10_serial: +gcc_11_serial: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -452,9 +453,9 @@ gcc_10_serial: - cuda11 - docker -gcc_10_mpionly: +gcc_11_mpionly: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -474,9 +475,9 @@ gcc_10_mpionly: - cuda11 - docker -gcc_10_hybrid: +gcc_11_hybrid: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -495,9 +496,9 @@ gcc_10_hybrid: - cuda11 - docker -gcc_10_serial_dbg: +gcc_11_serial_dbg: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -520,9 +521,9 @@ gcc_10_serial_dbg: - cuda11 - docker -gcc_10_mpionly_dbg: +gcc_11_mpionly_dbg: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -543,9 +544,9 @@ gcc_10_mpionly_dbg: - cuda11 - docker -gcc_10_hybrid_dbg: +gcc_11_hybrid_dbg: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -565,9 +566,9 @@ gcc_10_hybrid_dbg: - cuda11 - docker -gcc_10_hybrid_dbg_sp: +gcc_11_hybrid_dbg_sp: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -590,9 +591,9 @@ gcc_10_hybrid_dbg_sp: - cuda11 - docker -gcc_11_serial: +gcc_12_serial: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-12 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -614,9 +615,9 @@ gcc_11_serial: - cuda11 - docker -gcc_11_mpionly: +gcc_12_mpionly: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-12 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -636,9 +637,9 @@ gcc_11_mpionly: - cuda11 - docker -gcc_11_hybrid: +gcc_12_hybrid: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-12 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -657,9 +658,9 @@ gcc_11_hybrid: - cuda11 - docker -gcc_11_serial_dbg: +gcc_12_serial_dbg: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-12 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -682,9 +683,9 @@ gcc_11_serial_dbg: - cuda11 - docker -gcc_11_mpionly_dbg: +gcc_12_mpionly_dbg: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-12 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -705,9 +706,9 @@ gcc_11_mpionly_dbg: - cuda11 - docker -gcc_11_hybrid_dbg: +gcc_12_hybrid_dbg: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-12 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -727,9 +728,9 @@ gcc_11_hybrid_dbg: - cuda11 - docker -gcc_11_hybrid_dbg_sp: +gcc_12_hybrid_dbg_sp: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-12 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -752,14 +753,24 @@ gcc_11_hybrid_dbg_sp: - cuda11 - docker -gcc_12_serial: +gcc_13_serial: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-12 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-13 + before_script: + - pip3 install lbmpy==1.3.3 jinja2 pytest + - cd python + - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla + - pip3 list + - cd .. + - CC=gcc CXX=g++ pip3 install cupy-cuda11x variables: WALBERLA_BUILD_WITH_CUDA: "ON" WALBERLA_BUILD_WITH_MPI: "OFF" WALBERLA_BUILD_WITH_OPENMP: "OFF" WALBERLA_BUILD_WITH_PARMETIS: "OFF" + WALBERLA_BUILD_WITH_CODEGEN: "ON" + WALBERLA_BUILD_WITH_PYTHON: "ON" + WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT: "ON" only: variables: - $ENABLE_NIGHTLY_BUILDS @@ -767,12 +778,22 @@ gcc_12_serial: - cuda11 - docker -gcc_12_mpionly: +gcc_13_mpionly: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-12 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-13 + before_script: + - pip3 install lbmpy==1.3.3 jinja2 pytest + - cd python + - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla + - pip3 list + - cd .. + - CC=gcc CXX=g++ pip3 install cupy-cuda11x variables: WALBERLA_BUILD_WITH_CUDA: "ON" WALBERLA_BUILD_WITH_OPENMP: "OFF" + WALBERLA_BUILD_WITH_CODEGEN: "ON" + WALBERLA_BUILD_WITH_PYTHON: "ON" + WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT: "ON" only: variables: - $ENABLE_NIGHTLY_BUILDS @@ -780,66 +801,116 @@ gcc_12_mpionly: - cuda11 - docker -gcc_12_hybrid: +gcc_13_hybrid: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-12 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-13 stage: pretest + before_script: + - pip3 install lbmpy==1.3.3 jinja2 pytest + - cd python + - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla + - pip3 list + - cd .. + - CC=gcc CXX=g++ pip3 install cupy-cuda11x variables: WALBERLA_BUILD_WITH_CUDA: "ON" + WALBERLA_BUILD_WITH_CODEGEN: "ON" + WALBERLA_BUILD_WITH_PYTHON: "ON" + WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT: "ON" tags: - cuda11 - docker -gcc_12_serial_dbg: +gcc_13_serial_dbg: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-12 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-13 + before_script: + - pip3 install lbmpy==1.3.3 jinja2 pytest + - cd python + - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla + - pip3 list + - cd .. + - CC=gcc CXX=g++ pip3 install cupy-cuda11x variables: WALBERLA_BUILD_WITH_CUDA: "ON" WALBERLA_BUILD_WITH_MPI: "OFF" WALBERLA_BUILD_WITH_OPENMP: "OFF" WALBERLA_BUILD_WITH_PARMETIS: "OFF" CMAKE_BUILD_TYPE: "DebugOptimized" + WALBERLA_BUILD_WITH_CODEGEN: "ON" + WALBERLA_BUILD_WITH_PYTHON: "ON" + WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT: "ON" tags: - cuda11 - docker -gcc_12_mpionly_dbg: +gcc_13_mpionly_dbg: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-12 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-13 + before_script: + - pip3 install lbmpy==1.3.3 jinja2 pytest + - cd python + - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla + - pip3 list + - cd .. + - CC=gcc CXX=g++ pip3 install cupy-cuda11x variables: WALBERLA_BUILD_WITH_CUDA: "ON" CMAKE_BUILD_TYPE: "DebugOptimized" WALBERLA_BUILD_WITH_OPENMP: "OFF" + WALBERLA_BUILD_WITH_CODEGEN: "ON" + WALBERLA_BUILD_WITH_PYTHON: "ON" + WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT: "ON" tags: - cuda11 - docker -gcc_12_hybrid_dbg: +gcc_13_hybrid_dbg: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-12 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-13 + before_script: + - pip3 install lbmpy==1.3.3 jinja2 pytest + - cd python + - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla + - pip3 list + - cd .. + - CC=gcc CXX=g++ pip3 install cupy-cuda11x variables: WALBERLA_BUILD_WITH_CUDA: "ON" CMAKE_BUILD_TYPE: "DebugOptimized" + WALBERLA_BUILD_WITH_CODEGEN: "ON" + WALBERLA_BUILD_WITH_PYTHON: "ON" + WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT: "ON" tags: - cuda11 - docker -gcc_12_hybrid_dbg_sp: +gcc_13_hybrid_dbg_sp: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-12 + image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-13 + before_script: + - pip3 install lbmpy==1.3.3 jinja2 pytest + - cd python + - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla + - pip3 list + - cd .. + - CC=gcc CXX=g++ pip3 install cupy-cuda11x variables: WALBERLA_BUILD_WITH_CUDA: "ON" CMAKE_BUILD_TYPE: "DebugOptimized" WALBERLA_DOUBLE_ACCURACY: "OFF" WALBERLA_BUILD_WITH_PARMETIS: "OFF" WALBERLA_BUILD_WITH_METIS: "OFF" + WALBERLA_BUILD_WITH_CODEGEN: "ON" + WALBERLA_BUILD_WITH_PYTHON: "ON" + WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT: "ON" tags: - cuda11 - docker -clang_12_serial: +clang_13_serial: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-12 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -861,9 +932,9 @@ clang_12_serial: - cuda11 - docker -clang_12_mpionly: +clang_13_mpionly: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-12 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -883,9 +954,9 @@ clang_12_mpionly: - cuda11 - docker -clang_12_hybrid: +clang_13_hybrid: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-12 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -904,9 +975,9 @@ clang_12_hybrid: - cuda11 - docker -clang_12_serial_dbg: +clang_13_serial_dbg: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-12 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -929,9 +1000,9 @@ clang_12_serial_dbg: - cuda11 - docker -clang_12_mpionly_dbg: +clang_13_mpionly_dbg: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-12 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -952,9 +1023,9 @@ clang_12_mpionly_dbg: - cuda11 - docker -clang_12_hybrid_dbg: +clang_13_hybrid_dbg: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-12 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -971,9 +1042,9 @@ clang_12_hybrid_dbg: - cuda11 - docker -clang_12_hybrid_dbg_sp: +clang_13_hybrid_dbg_sp: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-12 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -996,9 +1067,9 @@ clang_12_hybrid_dbg_sp: - cuda11 - docker -clang_13_serial: +clang_14_serial: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -1020,9 +1091,9 @@ clang_13_serial: - cuda11 - docker -clang_13_mpionly: +clang_14_mpionly: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -1042,9 +1113,9 @@ clang_13_mpionly: - cuda11 - docker -clang_13_hybrid: +clang_14_hybrid: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -1063,9 +1134,9 @@ clang_13_hybrid: - cuda11 - docker -clang_13_serial_dbg: +clang_14_serial_dbg: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -1088,9 +1159,9 @@ clang_13_serial_dbg: - cuda11 - docker -clang_13_mpionly_dbg: +clang_14_mpionly_dbg: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -1111,9 +1182,9 @@ clang_13_mpionly_dbg: - cuda11 - docker -clang_13_hybrid_dbg: +clang_14_hybrid_dbg: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -1133,9 +1204,9 @@ clang_13_hybrid_dbg: - cuda11 - docker -clang_13_hybrid_dbg_sp: +clang_14_hybrid_dbg_sp: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -1158,9 +1229,9 @@ clang_13_hybrid_dbg_sp: - cuda11 - docker -clang_14_serial: +clang_15_serial: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -1182,9 +1253,9 @@ clang_14_serial: - cuda11 - docker -clang_14_mpionly: +clang_15_mpionly: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -1204,9 +1275,9 @@ clang_14_mpionly: - cuda11 - docker -clang_14_hybrid: +clang_15_hybrid: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -1225,9 +1296,9 @@ clang_14_hybrid: - cuda11 - docker -clang_14_serial_dbg: +clang_15_serial_dbg: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -1250,9 +1321,9 @@ clang_14_serial_dbg: - cuda11 - docker -clang_14_mpionly_dbg: +clang_15_mpionly_dbg: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -1273,9 +1344,9 @@ clang_14_mpionly_dbg: - cuda11 - docker -clang_14_hybrid_dbg: +clang_15_hybrid_dbg: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -1295,9 +1366,9 @@ clang_14_hybrid_dbg: - cuda11 - docker -clang_14_hybrid_dbg_sp: +clang_15_hybrid_dbg_sp: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -1320,9 +1391,9 @@ clang_14_hybrid_dbg_sp: - cuda11 - docker -clang_15_serial: +clang_16_serial: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-16 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -1344,9 +1415,9 @@ clang_15_serial: - cuda11 - docker -clang_15_mpionly: +clang_16_mpionly: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-16 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -1366,9 +1437,9 @@ clang_15_mpionly: - cuda11 - docker -clang_15_hybrid: +clang_16_hybrid: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-16 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -1384,9 +1455,9 @@ clang_15_hybrid: - cuda11 - docker -clang_15_serial_dbg: +clang_16_serial_dbg: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-16 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -1406,9 +1477,9 @@ clang_15_serial_dbg: - cuda11 - docker -clang_15_mpionly_dbg: +clang_16_mpionly_dbg: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-16 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -1426,9 +1497,9 @@ clang_15_mpionly_dbg: - cuda11 - docker -clang_15_hybrid_dbg: +clang_16_hybrid_dbg: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-16 before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest - cd python @@ -1445,9 +1516,9 @@ clang_15_hybrid_dbg: - cuda11 - docker -clang_15_hybrid_dbg_sp: +clang_16_hybrid_dbg_sp: extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15 + image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-16 stage: pretest before_script: - pip3 install lbmpy==1.3.3 jinja2 pytest @@ -1468,23 +1539,6 @@ clang_15_hybrid_dbg_sp: - cuda11 - docker - - -gcc_8_hybrid_dbg_noboost: - extends: .build_template - image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:8 - before_script: - - rm -rf /opt/boost /usr/include/boost - variables: - CMAKE_BUILD_TYPE: "DebugOptimized" - WALBERLA_BUILD_WITH_CUDA: "OFF" - WALBERLA_ENABLE_GUI: "OFF" - WALBERLA_BUILD_WITH_PYTHON: "OFF" - tags: - - docker - - - ############################################################################### ## ## ## STL Debug Build ## diff --git a/CMakeLists.txt b/CMakeLists.txt index 5eae2503b9bd414976b0cbfce5478d2af4ea4a54..1d21694af654ba0b996a8bbce0c3096ef6a125ab 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1276,12 +1276,6 @@ if ( WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT ) message( FATAL_ERROR "Compiler: ${CMAKE_CXX_COMPILER} Version: ${CMAKE_CXX_COMPILER_VERSION} does not support half precision" ) endif () - # Local host optimization - if ( NOT WALBERLA_OPTIMIZE_FOR_LOCALHOST ) - message( WARNING "[WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT] " - "You are not optimizing for localhost. You may encounter linker errors, or WORSE: silent incorrect fp16 arithmetic! Consider also enabling WALBERLA_OPTIMIZE_FOR_LOCALHOST!" ) - endif () # Local host check - endif () # Check if WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT is set ############################################################################################################################ diff --git a/cmake/FindOpenMesh.cmake b/cmake/FindOpenMesh.cmake index 4dbb7f7dd06fe02b5019b4a52e76287eafbc8651..76ca8682a1e6881a0564fb67234c8230362854eb 100644 --- a/cmake/FindOpenMesh.cmake +++ b/cmake/FindOpenMesh.cmake @@ -53,7 +53,7 @@ # #=========================================================================== -cmake_minimum_required(VERSION 3.3.0) +cmake_minimum_required(VERSION 3.5.0) #if already found via finder or simulated finder in openmesh CMakeLists.txt, skip the search IF (NOT OPENMESH_FOUND) diff --git a/python/lbmpy_walberla/additional_data_handler.py b/python/lbmpy_walberla/additional_data_handler.py index 8982ff04c926db34c3e2db5e599a5e44a9068450..daaab32fd40aff5f2150495bf4782ef61a282dae 100644 --- a/python/lbmpy_walberla/additional_data_handler.py +++ b/python/lbmpy_walberla/additional_data_handler.py @@ -159,7 +159,7 @@ class NoSlipLinearBouzidiAdditionalDataHandler(AdditionalDataHandler): @property def constructor_argument_name(self): - return "wallDistance" + return "wallDistanceBouzidi" @property def constructor_arguments(self): @@ -168,7 +168,7 @@ class NoSlipLinearBouzidiAdditionalDataHandler(AdditionalDataHandler): @property def initialiser_list(self): - return "elementInitialiser(wallDistance)," + return f"elementInitialiser({self.constructor_argument_name})," @property def additional_arguments_for_fill_function(self): @@ -207,7 +207,7 @@ class QuadraticBounceBackAdditionalDataHandler(AdditionalDataHandler): @property def constructor_argument_name(self): - return "wallDistance" + return "wallDistanceQuadraticBB" @property def constructor_arguments(self): @@ -216,7 +216,7 @@ class QuadraticBounceBackAdditionalDataHandler(AdditionalDataHandler): @property def initialiser_list(self): - return "elementInitialiser(wallDistance)," + return f"elementInitialiser({self.constructor_argument_name})," @property def additional_arguments_for_fill_function(self): diff --git a/src/core/DataTypes.h b/src/core/DataTypes.h index 4e7c019a86dcf0ce23fb7c8a9e66e6add8500bde..d6147c12be61a072f112e49de6e68001a6013abc 100644 --- a/src/core/DataTypes.h +++ b/src/core/DataTypes.h @@ -167,6 +167,7 @@ using real_t = double; using real_t = float; #endif +#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT /// Half precision support. Experimental. Use carefully. /// /// This feature is experimental, since it strictly depends on the underlying architecture and compiler support. @@ -174,7 +175,6 @@ using real_t = float; /// interchange. Arithmetic operations will likely involve casting to fp32 (C++ float) and truncation to fp16. /// Only bandwidth bound code may therefore benefit. None of this is guaranteed, and may change in the future. /// -#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT /// FIXME: (not really right) Clang version must be 15 or higher for x86 half precision support. /// FIXME: (not really right) GCC version must be 12 or higher for x86 half precision support. /// FIXME: (I don't know) Also support seems to require SSE, so ensure that respective instruction sets are enabled. @@ -202,7 +202,7 @@ using half = _Float16; // Another possible half precision format would be the one from Google Brain (bfloat16) with an 8 bit exponent and a 7 bit mantissa. // Compare https://i10git.cs.fau.de/ab04unyc/walberla/-/issues/23 using float16 = half; -#endif +#endif // WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT using float32 = float; using float64 = double; @@ -276,7 +276,7 @@ inline bool floatIsEqual( walberla::float16 lhs, walberla::float16 rhs, const wa const auto difference = lhs - rhs; return ( (difference < 0) ? -difference : difference ) < epsilon; } -#endif +#endif // WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT } // namespace walberla diff --git a/src/core/mpi/CMakeLists.txt b/src/core/mpi/CMakeLists.txt index 13cc3653bd41f03e1840abd09f71147eee3c57cd..2987e28c266bce6552659cce89f5ef9546927a76 100644 --- a/src/core/mpi/CMakeLists.txt +++ b/src/core/mpi/CMakeLists.txt @@ -19,11 +19,14 @@ target_sources( core MPIIO.h MPIManager.cpp MPIManager.h + MPIOperation.h MPITextFile.cpp MPITextFile.h + MPIWrapper.cpp MPIWrapper.h OpenMPBufferSystem.h OpenMPBufferSystem.impl.h + Operation.h RecvBuffer.h Reduce.h SendBuffer.h diff --git a/src/core/mpi/Datatype.h b/src/core/mpi/Datatype.h index f717cb6d94c661aec320a864c972dbba4a49d2ae..ac933bbe8c1e572520d0e67f75f8199c1adc8910 100644 --- a/src/core/mpi/Datatype.h +++ b/src/core/mpi/Datatype.h @@ -22,6 +22,7 @@ #pragma once #include "MPIWrapper.h" +#include "core/Abort.h" namespace walberla { @@ -43,6 +44,21 @@ namespace mpi { WALBERLA_MPI_SECTION() { MPI_Type_commit( &mpiDatatype_ ); } } + explicit Datatype(const uint_t byteSize) : mpiDatatype_(MPI_DATATYPE_NULL) + { + WALBERLA_MPI_SECTION() + { + if (MPI_Type_contiguous(int_c(byteSize), MPI_BYTE, &mpiDatatype_) != MPI_SUCCESS) + { + WALBERLA_ABORT("MPI_Type_contiguous " << typeid(mpiDatatype_).name() << " failed."); + } + if (MPI_Type_commit(&mpiDatatype_) != MPI_SUCCESS) + { + WALBERLA_ABORT("MPI_Type_commit " << typeid(mpiDatatype_).name() << " failed."); + } + } + } + void init( MPI_Datatype datatype ) { mpiDatatype_ = datatype; diff --git a/src/core/mpi/MPIManager.cpp b/src/core/mpi/MPIManager.cpp index a334bc16c4878cea58a1452cd083fba31d9d3c7e..c25ca1082277d89c8be486ae7a9c61350d6bfea0 100644 --- a/src/core/mpi/MPIManager.cpp +++ b/src/core/mpi/MPIManager.cpp @@ -119,6 +119,10 @@ void MPIManager::finalizeMPI() { WALBERLA_MPI_SECTION() { + /// Free the custom types and operators + customMPITypes_.clear(); + customMPIOperations_.clear(); + if (isMPIInitialized_ && !currentlyAborting_) { isMPIInitialized_ = false; diff --git a/src/core/mpi/MPIManager.h b/src/core/mpi/MPIManager.h index 9ba3fb4d04b8f6b0c7f1e2041454677b9156bf75..60ce4d8514e57bb2465a14dfa304cab73b3b8be6 100644 --- a/src/core/mpi/MPIManager.h +++ b/src/core/mpi/MPIManager.h @@ -18,23 +18,28 @@ //! \author Florian Schornbaum <florian.schornbaum@fau.de> //! \author Martin Bauer <martin.bauer@fau.de> //! \author Christian Godenschwager <christian.godenschwager@fau.de> +//! \author Michael Zikeli <michael.zikeli@fau.de> // //====================================================================================================================== #pragma once -#include "MPIWrapper.h" #include "core/DataTypes.h" #include "core/debug/Debug.h" #include "core/math/Uint.h" +#include "core/mpi/Datatype.h" +#include "core/mpi/MPIOperation.h" +#include "core/mpi/MPIWrapper.h" +#include "core/mpi/Operation.h" #include "core/singleton/Singleton.h" +#include <map> +#include <typeindex> namespace walberla { namespace mpi { - /** * Encapsulates MPI Rank/Communicator information * @@ -127,6 +132,87 @@ public: /// Indicates whether MPI-IO can be used with the current MPI communicator; certain versions of OpenMPI produce /// segmentation faults when using MPI-IO with a 3D Cartesian MPI communicator (see waLBerla issue #73) bool isCommMPIIOValid() const; + + /// Return the custom MPI_Datatype stored in 'customMPITypes_' and defined by the user and passed to 'commitCustomType'. + template< typename CType > + MPI_Datatype getCustomType() const + { + WALBERLA_MPI_SECTION() + { + return customMPITypes_.at(typeid(CType)).operator MPI_Datatype(); + } + WALBERLA_NON_MPI_SECTION() + { + WALBERLA_ABORT( "This should not be called, if waLBerla is compiled without MPI." ); + } + } + + /// Return the custom MPI_Op stored in 'customMPIOperation_' and defined by the user and passed to 'commitCustomOperation'. + template< typename CType > + MPI_Op getCustomOperation(mpi::Operation op) const + { + // FIXME the operation is actually type dependent but implementing this is not straightforward, + // compare comment at declaration of 'customMPIOperations_'. + WALBERLA_MPI_SECTION() + { + return customMPIOperations_.at(op).operator MPI_Op(); + } + WALBERLA_NON_MPI_SECTION() + { + WALBERLA_ABORT( "This should not be called, if waLBerla is compiled without MPI." ); + } + } + //@} + //******************************************************************************************************************* + + //** Setter Functions ********************************************************************************************** + /*! \name Setter Function */ + //@{ + ///! \brief Initializes a custom MPI_Datatype and logs it in the customMPITypes_ map. + ///! \param argument The argument that is expected by the constructor of mpi::Datatype + /// At the point of creation 26.01.2024 this is either MPI_Datatype or const int. + template < typename CType, class ConstructorArgumentType > + void commitCustomType( ConstructorArgumentType& argument ) + { + WALBERLA_MPI_SECTION() + { + if (isMPIInitialized_ && !currentlyAborting_) + { + static_assert( std::is_same_v<ConstructorArgumentType, const int> || std::is_same_v<ConstructorArgumentType, MPI_Datatype>, + "mpi::Datatype has only an constructor for an int value or a MPI_Datatype." ); + [[maybe_unused]] auto worked = std::get< 1 >( customMPITypes_.try_emplace(typeid(CType), argument) ); + WALBERLA_ASSERT(worked, "Emplacement of type " << typeid(CType).name() << " did not work."); + } else { + WALBERLA_ABORT( "MPI must be initialized before an new MPI_Datatype can be committed." ); + } + } + WALBERLA_NON_MPI_SECTION() + { + WALBERLA_ABORT( "This should not be called, if waLBerla is compiled without MPI." ); + } + } + + ///! \brief Initializes a custom MPI_Op and logs it in the customMPIOperation map + ///! \param op A operator, e.g. SUM, MIN. + ///! \param fct The definition of the MPI_User_function used for this operator. + template< typename CType > + void commitCustomOperation( mpi::Operation op, MPI_User_function* fct ) + { + WALBERLA_MPI_SECTION() + { + if (isMPIInitialized_ && !currentlyAborting_) + { + [[maybe_unused]] auto worked = std::get< 1 >(customMPIOperations_.try_emplace(op, fct)); + WALBERLA_ASSERT(worked, "Emplacement of operation " << typeid(op).name() << " of type " + << typeid(CType).name() << " did not work."); + } + else { WALBERLA_ABORT("MPI must be initialized before an new MPI_Op can be committed."); } + } + WALBERLA_NON_MPI_SECTION() + { + WALBERLA_ABORT( "This should not be called, if waLBerla is compiled without MPI." ); + } + } //@} //******************************************************************************************************************* @@ -163,6 +249,33 @@ private: // Singleton MPIManager() : comm_(MPI_COMM_NULL) { WALBERLA_NON_MPI_SECTION() { rank_ = 0; } } +/// It is possible to commit own datatypes to MPI, that are not part of the standard. One example would be float16. +/// With these maps, it is possible to track self defined MPI_Datatypes and MPI_Ops, to access them at any time and +/// place in the program, also, they are automatically freed once MPIManager::finalizeMPI is called. +/// To initialize types or operations and add them to the map, the getter functions 'commitCustomType' and +/// 'commitCustomOperation' should be used. This can for example be done e.g. in the specialization of the MPITrait of +/// the newly defined type. For an example see MPIWrapper.cpp + std::map< std::type_index, walberla::mpi::Datatype > customMPITypes_{}; + std::map< walberla::mpi::Operation, walberla::mpi::MPIOperation > customMPIOperations_{}; + // FIXME this must be type specific as well, but doing so is a bit more complicated. + // 1. Idea) Combining both maps together e.g. as std::map< typeid(CType), + // std::pair< MPI_DataType, + // std::map< Operation, + // MPI_Op > > > customMPITypesWithOps_{}; + // There the access is quite nasty to look at, but easily doable, the construction however is quite difficult + // also one needs to make sure that the type is initialized before the operation. + // 2. Idea) Leaving it as two maps customMPITypes_ and customMPIOperations, + // but storing a pair of typeid and operation as key for the operation map. + // This way everything would look nice, but someone needs to implement a comparison operator for this pair. + // I personally don't know where to put this comparison operator to, since it should not be part of the manager. + // 3. Idea) Since this relies on the use of MPITrait<CType> --> { MPI_Datatype, MPI_Op } someone could define a object + // to store in the MPIManager there, to keep the MPIManager light and easily understandable. + // I'm also not sure if the MPITrait is the right spot for this though. + // For more information about the changes done in the code to allow custom defined types and operations, + // check out MR !647 ( https://i10git.cs.fau.de/walberla/walberla/-/merge_requests/647 ) + + + }; // class MPIManager diff --git a/src/core/mpi/MPIOperation.h b/src/core/mpi/MPIOperation.h new file mode 100644 index 0000000000000000000000000000000000000000..3da98bfe8ea2bdb8783b02e997cdee486bb46a0d --- /dev/null +++ b/src/core/mpi/MPIOperation.h @@ -0,0 +1,64 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file MPIOperation.h +//! \ingroup core +//! \author Michael Zikeli <michael.zikeli@fau.de> +// +//====================================================================================================================== + +#pragma once + +#include "core/Abort.h" +#include "core/mpi/MPIWrapper.h" + +namespace walberla::mpi{ + +//******************************************************************************************************************* +/*! RAII class for MPI operators that commits and frees them +* +*/ +//******************************************************************************************************************* +class MPIOperation +{ + public: + MPIOperation() = delete; + + explicit MPIOperation( MPI_User_function* fct ) : mpiOperation_( MPI_OP_NULL ) + { + WALBERLA_MPI_SECTION() { + if ( MPI_Op_create( fct, true, &mpiOperation_) != MPI_SUCCESS ) + { + WALBERLA_ABORT("MPI_Op_create for " << typeid(mpiOperation_).name() << " failed." ); + } + } // WALBERLA_MPI_SECTIION + } + + ~MPIOperation() + { + WALBERLA_MPI_SECTION() { MPI_Op_free( & mpiOperation_ ); } + } + + operator MPI_Op() const + { + return mpiOperation_; + } + + protected: + MPI_Op mpiOperation_; +}; + + +} // namespace walberla::mpi \ No newline at end of file diff --git a/src/core/mpi/MPIWrapper.cpp b/src/core/mpi/MPIWrapper.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9d68668676c932de2c564dd37312ca3645104e45 --- /dev/null +++ b/src/core/mpi/MPIWrapper.cpp @@ -0,0 +1,142 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file MPIWrapper.cpp +//! \ingroup core +//! \author Michael Zikeli <michael.zikeli@fau.de> +// +//====================================================================================================================== + +#include "MPIWrapper.h" + +#include <set> + +#include "MPIManager.h" + +#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT +namespace walberla +{ + +namespace mpi +{ +namespace +{ +/// These functions than can be used by self defined mpi operations, e.g. by using CustomMPIOperation. +using float16_t = walberla::float16; +// The signature of MPI_User_function looks like this +// typedef void MPI_User_function( void *invec, void *inoutvec, int *len, MPI_Datatype *datatype); + +void sum(void* mpiRHSArray, void* mpiLHSArray, int* len, MPI_Datatype*) +{ + // cast mpi type to target c++ type + auto* rhs = (float16_t*) mpiRHSArray; + auto* lhs = (float16_t*) mpiLHSArray; + for (int i = 0; i < *len; ++i) + { + *lhs += *rhs; + } +} + +void min(void* mpiRHSArray, void* mpiLHSArray, int* len, MPI_Datatype*) +{ + // cast mpi type to target c++ type + auto* rhs = (float16_t*) mpiRHSArray; + auto* lhs = (float16_t*) mpiLHSArray; + for (int i = 0; i < *len; ++i) + { + *lhs = (*rhs >= *lhs) ? *lhs : *rhs; + } +} + +void max(void* mpiRHSArray, void* mpiLHSArray, int* len, MPI_Datatype*) +{ + // cast mpi type to target c++ type + auto* rhs = (float16_t*) mpiRHSArray; + auto* lhs = (float16_t*) mpiLHSArray; + for (int i = 0; i < *len; ++i) + { + *lhs = (*rhs <= *lhs) ? *lhs : *rhs; + } +} + +MPI_User_function* returnMPIUserFctPointer(const Operation op) +{ + switch (op) + { + case SUM: + return ∑ + case MIN: + return &min; + case MAX: + return &max; + default: + WALBERLA_ABORT("The chosen operation " << typeid(op).name() << " is not implemented for float16 yet."); + } +} + +} +} + +/// Here some MPI_Datatypes and MPI_Ops are initialized that are not part of the MPI Standard and therefore have to be +/// define yourself. This is done in the MPIManager, since they need to be freed before MPIFinalize is called and this +/// way it is easiest to keep track of them. +/// For more information about this feature compare MR !647 ( +/// https://i10git.cs.fau.de/walberla/walberla/-/merge_requests/647 ) + +/*! + * \brief Specialization of MPITrait for float16 + * + * The initialization of the self defined MPI_Datatype and MPI_Op is done in the MPIManager so that it can be freed + * before MPI is finalized. + */ +MPI_Datatype MPITrait< walberla::float16 >::type() +{ + +#ifdef WALBERLA_BUILD_WITH_MPI + // Since this type should be created only once, a static variable is used as safeguard. + static bool initializedType = false; + if (!initializedType) + { + // Since float16 consists of two Bytes, a continuous datatype with size of two byte is created. + mpi::MPIManager::instance()->commitCustomType< walberla::float16, const int >(2); + initializedType = true; + } + return mpi::MPIManager::instance()->getCustomType< walberla::float16 >(); +#else + return mpistubs::MPI_FLOAT16; +#endif +} + +MPI_Op MPITrait< walberla::float16 >::operation(const mpi::Operation& op) +{ + WALBERLA_MPI_SECTION() + { + // mpi::Operation is an enum and not an enum class, thus, it is not sufficient to make a just a bool variable as + // safeguard, since all operations are of type mpi::Operation and only the first one would pass the safeguard. + // Therefore, a set is created and each operation that is called the first time, will be initialized. + static std::set< mpi::Operation > operationInitializationRegister; + const bool needsInitialization = std::get< 1 >(operationInitializationRegister.emplace(op)); + if (needsInitialization) + { + mpi::MPIManager::instance()->commitCustomOperation< walberla::float16 >( + op, mpi::returnMPIUserFctPointer(op)); + } + return MPIManager::instance()->getCustomOperation< walberla::float16 >(op); + } + WALBERLA_NON_MPI_SECTION() { WALBERLA_ABORT("If MPI is not used, a custom operator should never be called."); } +} + +} // namespace walberla +#endif // WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT diff --git a/src/core/mpi/MPIWrapper.h b/src/core/mpi/MPIWrapper.h index eecee3136c702a61e7987d25705076a7c780a635..baf0c62d8ba65bd7bd91a665d5d5261e8323d778 100644 --- a/src/core/mpi/MPIWrapper.h +++ b/src/core/mpi/MPIWrapper.h @@ -23,8 +23,7 @@ #pragma once #include "core/Abort.h" - - +#include "core/mpi/Operation.h" /// \cond internal @@ -47,6 +46,7 @@ #endif + namespace walberla { namespace mpistubs { //empty namespace which can be used @@ -77,8 +77,6 @@ namespace mpistubs { #pragma warning ( pop ) #endif - - #else // WALBERLA_BUILD_WITH_MPI @@ -143,6 +141,10 @@ const MPI_Datatype MPI_UNSIGNED_LONG_LONG = 10; const MPI_Datatype MPI_FLOAT = 11; const MPI_Datatype MPI_DOUBLE = 12; const MPI_Datatype MPI_LONG_DOUBLE = 13; +#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT + const MPI_Datatype MPI_FLOAT16 = 14; +#endif + const int MPI_ORDER_C = 0; const int MPI_ORDER_FORTRAN = 0; @@ -151,16 +153,17 @@ const MPI_Datatype MPI_ANY_SOURCE = -2; const MPI_Datatype MPI_ANY_TAG = -1; const MPI_Datatype MPI_DATATYPE_NULL = 0; -const MPI_Op MPI_MIN = 100; -const MPI_Op MPI_MAX = 101; -const MPI_Op MPI_SUM = 102; -const MPI_Op MPI_PROD = 103; -const MPI_Op MPI_LAND = 104; -const MPI_Op MPI_BAND = 105; -const MPI_Op MPI_LOR = 106; -const MPI_Op MPI_BOR = 107; -const MPI_Op MPI_LXOR = 108; -const MPI_Op MPI_BXOR = 109; +const MPI_Op MPI_OP_NULL = 99; +const MPI_Op MPI_MIN = 100; +const MPI_Op MPI_MAX = 101; +const MPI_Op MPI_SUM = 102; +const MPI_Op MPI_PROD = 103; +const MPI_Op MPI_LAND = 104; +const MPI_Op MPI_BAND = 105; +const MPI_Op MPI_LOR = 106; +const MPI_Op MPI_BOR = 107; +const MPI_Op MPI_LXOR = 108; +const MPI_Op MPI_BXOR = 109; const int MPI_PACKED = 1; const int MPI_UNDEFINED = -1; @@ -265,6 +268,7 @@ inline int MPI_Type_get_extent(MPI_Datatype, MPI_Aint*, MPI_Aint*) { WALBERLA_MP inline int MPI_Type_create_struct(int, const int[], const MPI_Aint[], const MPI_Datatype[], MPI_Datatype*) { WALBERLA_MPI_FUNCTION_ERROR } inline int MPI_Op_create(MPI_User_function*, int, MPI_Op*) { WALBERLA_MPI_FUNCTION_ERROR } +inline int MPI_Op_free(MPI_Op*) { WALBERLA_MPI_FUNCTION_ERROR } inline int MPI_Get_processor_name( char*, int* ) { WALBERLA_MPI_FUNCTION_ERROR } @@ -307,58 +311,104 @@ namespace mpi { + + /*!\class MPITrait // \brief Base template for the MPITrait class // // The MPITrait class template offers a translation between the C++ built-in data types and -// the corresponding MPI data types required for send and receive operations. For a particular -// MPITrait instantiation, the corresponding MPI data type can be obtained by calling type() -// of the MPITrait. The following example demonstrates the application of the MPITrait class: +// the corresponding MPI data types its respective operation required for send, receive and reduce operations. +// For a particular MPITrait instantiation, the corresponding MPI data type can be obtained by calling type() +// as well as calling operation( const Operation& ) to the MPI operation corresponding to the MPI data type. +// The following example demonstrates the application of the MPITrait class: - \code +\code // Initialization of the MPI communication int* pi; // Integer buffer for the MPI send operation - ... // Initialization of the send buffer +... // Initialization of the send buffer // Sending 50 integers to process 0 MPI_Send( pi, 50, MPITrait< int >::type(), 0, 0, MPI_COMM_WORLD ); - \endcode -*/ +\endcode + */ template< typename T > -struct MPITrait; - - - -/// Macro for the creation of MPITrait specializations for the supported data types. +struct MPITrait +{ + static inline MPI_Datatype type(); + static inline MPI_Op operation(const mpi::Operation& ); +}; -#define WALBERLA_CREATE_MPITRAIT_SPECIALIZATION(CPP_TYPE,MPI_TYPE) \ +/// Macro for specialization of the MPI supported data types in MPITrait::type(). +#define WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(CPP_TYPE, MPI_TYPE) \ template<> \ - struct MPITrait< CPP_TYPE > \ + inline MPI_Datatype MPITrait< CPP_TYPE >::type() \ { \ - static inline MPI_Datatype type() { return (MPI_TYPE); } \ + return (MPI_TYPE); \ } - - // MPITRAIT SPECIALIZATIONS -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( char , MPI_CHAR ); -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( signed char , MPI_CHAR ); -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( signed short int , MPI_SHORT ); -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( signed int , MPI_INT ); -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( signed long int , MPI_LONG ); -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( signed long long , MPI_LONG_LONG ); -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( unsigned char , MPI_UNSIGNED_CHAR ); -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( unsigned short int , MPI_UNSIGNED_SHORT ); -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( unsigned int , MPI_UNSIGNED ); -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( unsigned long int , MPI_UNSIGNED_LONG ); -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( unsigned long long , MPI_UNSIGNED_LONG_LONG ); +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(char, MPI_CHAR) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(signed char, MPI_CHAR) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(signed short int, MPI_SHORT) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(signed int, MPI_INT) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(signed long int, MPI_LONG) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(signed long long, MPI_LONG_LONG) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(unsigned char, MPI_UNSIGNED_CHAR) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(unsigned short int, MPI_UNSIGNED_SHORT) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(unsigned int, MPI_UNSIGNED) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(unsigned long int, MPI_UNSIGNED_LONG) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(unsigned long long, MPI_UNSIGNED_LONG_LONG) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(float, MPI_FLOAT) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(double, MPI_DOUBLE) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(long double, MPI_LONG_DOUBLE) #ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT - WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( walberla::float16 , MPI_WCHAR ); +template<> struct MPITrait< float16 >{ + static MPI_Datatype type(); + static MPI_Op operation(const mpi::Operation& ); +}; +#endif + + +/*! + * \brief Specialization of the static operation() method of MPITrait. + * + * It chooses a MPI_Op depending on the value type of the object the operation is performed on. + * + * \param op The operation to be performed (op is an element of the enum mpi::Operation). + */ +template< typename T > +MPI_Op MPITrait< T >::operation(const mpi::Operation& op) +{ + switch (op) + { + case mpi::MIN: + return MPI_MIN; + case mpi::MAX: + return MPI_MAX; + case mpi::SUM: + return MPI_SUM; + case mpi::PRODUCT: + return MPI_PROD; + case mpi::LOGICAL_AND: + return MPI_LAND; + case mpi::BITWISE_AND: + return MPI_BAND; + case mpi::LOGICAL_OR: + return MPI_LOR; + case mpi::BITWISE_OR: + return MPI_BOR; + case mpi::LOGICAL_XOR: + return MPI_LXOR; + case mpi::BITWISE_XOR: + return MPI_BXOR; + default: + WALBERLA_ABORT("Unknown operation!"); + } +#ifdef __IBMCPP__ + return MPI_SUM; // never reached, helps to suppress a warning from the IBM compiler #endif -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( float , MPI_FLOAT ); -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( double , MPI_DOUBLE ); -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( long double , MPI_LONG_DOUBLE ); +} } // namespace walberla /// \endcond diff --git a/src/core/mpi/Operation.h b/src/core/mpi/Operation.h new file mode 100644 index 0000000000000000000000000000000000000000..f3097bb89fe4f3fcb3f4581d8435afe2b2b70ec8 --- /dev/null +++ b/src/core/mpi/Operation.h @@ -0,0 +1,27 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file Operation.h +//! \ingroup core +//! \author Michael Zikeli <michael.zikeli@fau.de> +// +//====================================================================================================================== +#pragma once + +namespace walberla::mpi +{ +// Note: I don't like at all that this is an enum and not an enum class, but changing this would be a major change in the framework. +enum Operation { MIN, MAX, SUM, PRODUCT, LOGICAL_AND, BITWISE_AND, LOGICAL_OR, BITWISE_OR, LOGICAL_XOR, BITWISE_XOR }; +} // namespace walberla::mpi \ No newline at end of file diff --git a/src/core/mpi/Reduce.h b/src/core/mpi/Reduce.h index 5e9bb8220112ff4bff9c19a02e66cb3c2d801d46..a0b6edb39cad16fdfa5c527f83f9fe007ba9fa2c 100644 --- a/src/core/mpi/Reduce.h +++ b/src/core/mpi/Reduce.h @@ -16,18 +16,19 @@ //! \file Reduce.h //! \ingroup core //! \author Christian Godenschwager <christian.godenschwager@fau.de> +//! \author Michael Zikeli <michael.zikeli@fau.de> // //====================================================================================================================== #pragma once -#include "BufferDataTypeExtensions.h" - #include "core/Abort.h" #include "core/DataTypes.h" #include "core/debug/Debug.h" -#include "core/mpi/MPIManager.h" #include "core/mpi/MPIWrapper.h" +#include "core/mpi/Operation.h" + +#include "BufferDataTypeExtensions.h" #include "core/math/Vector3.h" @@ -36,33 +37,10 @@ namespace walberla { -namespace mpi { - -enum Operation { MIN, MAX, SUM, PRODUCT, LOGICAL_AND, BITWISE_AND, LOGICAL_OR, BITWISE_OR, LOGICAL_XOR, BITWISE_XOR }; - -inline MPI_Op toMPI_Op( Operation operation ) +namespace mpi { - switch( operation ) - { - case MIN: return MPI_MIN; - case MAX: return MPI_MAX; - case SUM: return MPI_SUM; - case PRODUCT: return MPI_PROD; - case LOGICAL_AND: return MPI_LAND; - case BITWISE_AND: return MPI_BAND; - case LOGICAL_OR: return MPI_LOR; - case BITWISE_OR: return MPI_BOR; - case LOGICAL_XOR: return MPI_LXOR; - case BITWISE_XOR: return MPI_BXOR; - default: WALBERLA_ABORT( "Unknown operation!" ); - } -#ifdef __IBMCPP__ - return MPI_SUM; // never reached, helps to suppress a warning from the IBM compiler -#endif -} - //====================================================================================================================== /*! * \brief Reduces a value over all processes in-place @@ -91,11 +69,11 @@ void reduceInplace( T & value, Operation operation, int recvRank = 0, MPI_Comm c if( myRank == recvRank ) { - MPI_Reduce( MPI_IN_PLACE, &value, 1, MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( MPI_IN_PLACE, &value, 1, MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm ); } else { - MPI_Reduce( &value, nullptr, 1, MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( &value, nullptr, 1, MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm ); } } @@ -128,11 +106,11 @@ inline void reduceInplace( bool & value, Operation operation, int recvRank = 0, if( myRank == recvRank ) { - MPI_Reduce( MPI_IN_PLACE, &intValue, 1, MPITrait<int>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( MPI_IN_PLACE, &intValue, 1, MPITrait<int>::type(), MPITrait<int>::operation(operation), recvRank, comm ); } else { - MPI_Reduce( &intValue, nullptr, 1, MPITrait<int>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( &intValue, nullptr, 1, MPITrait<int>::type(), MPITrait<int>::operation(operation), recvRank, comm ); } value = intValue != 0; @@ -172,11 +150,11 @@ T reduce( const T value, Operation operation, int recvRank = 0, MPI_Comm comm = if( myRank == recvRank ) { - MPI_Reduce( const_cast<T*>( &value ), &result, 1, MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( const_cast<T*>( &value ), &result, 1, MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm ); } else { - MPI_Reduce( const_cast<T*>( &value ), nullptr, 1, MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( const_cast<T*>( &value ), nullptr, 1, MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm ); } return result; @@ -213,11 +191,11 @@ inline bool reduce( const bool value, Operation operation, int recvRank = 0, MPI if( myRank == recvRank ) { - MPI_Reduce( &intValue, &intResult, 1, MPITrait<int>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( &intValue, &intResult, 1, MPITrait<int>::type(), MPITrait<int>::operation(operation), recvRank, comm ); } else { - MPI_Reduce( &intValue, nullptr, 1, MPITrait<int>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( &intValue, nullptr, 1, MPITrait<int>::type(), MPITrait<int>::operation(operation), recvRank, comm ); } return intResult != 0; @@ -252,11 +230,11 @@ void reduceInplace( std::vector<T> & values, Operation operation, int recvRank = if( myRank == recvRank ) { - MPI_Reduce( MPI_IN_PLACE, values.empty() ? nullptr : &values[0], int_c( values.size() ), MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( MPI_IN_PLACE, values.empty() ? nullptr : &values[0], int_c( values.size() ), MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm ); } else { - MPI_Reduce( values.empty() ? nullptr : &values[0], nullptr, int_c( values.size() ), MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( values.empty() ? nullptr : &values[0], nullptr, int_c( values.size() ), MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm ); } } @@ -292,14 +270,14 @@ inline void reduceInplace( std::vector<bool> & values, Operation operation, int if( myRank == recvRank ) { - MPI_Reduce( MPI_IN_PLACE, sendBuffer.empty() ? nullptr : &sendBuffer[0], int_c( sendBuffer.size() ), MPITrait<uint8_t>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( MPI_IN_PLACE, sendBuffer.empty() ? nullptr : &sendBuffer[0], int_c( sendBuffer.size() ), MPITrait<uint8_t>::type(), MPITrait<uint8_t>::operation(operation), recvRank, comm ); size_t size = values.size(); convert( sendBuffer, values ); values.resize(size); } else { - MPI_Reduce( sendBuffer.empty() ? nullptr : &sendBuffer[0], nullptr, int_c( sendBuffer.size() ), MPITrait<uint8_t>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( sendBuffer.empty() ? nullptr : &sendBuffer[0], nullptr, int_c( sendBuffer.size() ), MPITrait<uint8_t>::type(), MPITrait<uint8_t>::operation(operation), recvRank, comm ); } } @@ -331,11 +309,11 @@ void reduceInplace( math::Vector3<T> & values, Operation operation, int recvRank if( myRank == recvRank ) { - MPI_Reduce( MPI_IN_PLACE, values.data(), 3, MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( MPI_IN_PLACE, values.data(), 3, MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm ); } else { - MPI_Reduce( values.data(), nullptr, 3, MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( values.data(), nullptr, 3, MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm ); } } @@ -367,11 +345,11 @@ inline void reduceInplace( math::Vector3<bool> & values, Operation operation, in if( myRank == recvRank ) { - MPI_Reduce( MPI_IN_PLACE, intValues.data(), 3, MPITrait<int>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( MPI_IN_PLACE, intValues.data(), 3, MPITrait<int>::type(), MPITrait<int>::operation(operation), recvRank, comm ); } else { - MPI_Reduce( intValues.data(), nullptr, 3, MPITrait<int>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( intValues.data(), nullptr, 3, MPITrait<int>::type(), MPITrait<int>::operation(operation), recvRank, comm ); } for(uint_t i = 0; i < 3; ++i) @@ -411,11 +389,11 @@ math::Vector3<T> reduce( const math::Vector3<T> & values, Operation operation, i if( myRank == recvRank ) { - MPI_Reduce( const_cast<T*>( values.data() ), result.data(), 3, MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( const_cast<T*>( values.data() ), result.data(), 3, MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm ); } else { - MPI_Reduce( const_cast<T*>( values.data() ), nullptr, 3, MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( const_cast<T*>( values.data() ), nullptr, 3, MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm ); } return result; @@ -452,14 +430,14 @@ inline math::Vector3<bool> reduce( const math::Vector3<bool> & values, Operation if( myRank == recvRank ) { - MPI_Reduce( MPI_IN_PLACE, intValues.data(), 3, MPITrait<int>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( MPI_IN_PLACE, intValues.data(), 3, MPITrait<int>::type(), MPITrait<int>::operation(operation), recvRank, comm ); for(uint_t i = 0; i < 3; ++i) results[i] = intValues[i] != 0; } else { - MPI_Reduce( intValues.data(), nullptr, 3, MPITrait<int>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( intValues.data(), nullptr, 3, MPITrait<int>::type(), MPITrait<int>::operation(operation), recvRank, comm ); } return results; @@ -487,7 +465,7 @@ T allReduce( const T & value, Operation operation, MPI_Comm comm = MPI_COMM_WORL WALBERLA_NON_MPI_SECTION() { return value; } T result; - MPI_Allreduce( const_cast<T*>( &value ), &result, 1, MPITrait<T>::type(), toMPI_Op(operation), comm ); + MPI_Allreduce( const_cast<T*>( &value ), &result, 1, MPITrait<T>::type(), MPITrait<T>::operation(operation), comm ); return result; } @@ -514,7 +492,7 @@ inline bool allReduce( const bool value, Operation operation, MPI_Comm comm = MP int intValue = value ? 1 : 0; - MPI_Allreduce( MPI_IN_PLACE, &intValue, 1, MPITrait<int>::type(), toMPI_Op(operation), comm ); + MPI_Allreduce( MPI_IN_PLACE, &intValue, 1, MPITrait<int>::type(), MPITrait<int>::operation(operation), comm ); return intValue != 0; } @@ -539,7 +517,7 @@ void allReduceInplace( T & value, Operation operation, MPI_Comm comm = MPI_COMM_ WALBERLA_NON_MPI_SECTION() { return; } - MPI_Allreduce( MPI_IN_PLACE, &value, 1, MPITrait<T>::type(), toMPI_Op(operation), comm ); + MPI_Allreduce( MPI_IN_PLACE, &value, 1, MPITrait<T>::type(), MPITrait<T>::operation(operation), comm ); } @@ -562,7 +540,7 @@ inline void allReduceInplace( bool & value, Operation operation, MPI_Comm comm = int intValue = value ? 1 : 0; - MPI_Allreduce( MPI_IN_PLACE, &intValue, 1, MPITrait<int>::type(), toMPI_Op(operation), comm ); + MPI_Allreduce( MPI_IN_PLACE, &intValue, 1, MPITrait<int>::type(), MPITrait<int>::operation(operation), comm ); value = intValue != 0; } @@ -587,7 +565,7 @@ void allReduceInplace( std::vector<T> & values, Operation operation, MPI_Comm co WALBERLA_NON_MPI_SECTION() { return; } - MPI_Allreduce( MPI_IN_PLACE, values.empty() ? nullptr : &values[0], int_c( values.size() ), MPITrait<T>::type(), toMPI_Op(operation), comm ); + MPI_Allreduce( MPI_IN_PLACE, values.empty() ? nullptr : &values[0], int_c( values.size() ), MPITrait<T>::type(), MPITrait<T>::operation(operation), comm ); } @@ -612,7 +590,7 @@ inline void allReduceInplace( std::vector<bool> & bools, Operation operation, MP std::vector<uint8_t> sendBuffer; convert( bools, sendBuffer ); - MPI_Allreduce( MPI_IN_PLACE, sendBuffer.empty() ? nullptr : &sendBuffer[0], int_c( sendBuffer.size() ), MPITrait<uint8_t>::type(), toMPI_Op(operation), comm ); + MPI_Allreduce( MPI_IN_PLACE, sendBuffer.empty() ? nullptr : &sendBuffer[0], int_c( sendBuffer.size() ), MPITrait<uint8_t>::type(), MPITrait<uint8_t>::operation(operation), comm ); auto size = bools.size(); convert(sendBuffer, bools); bools.resize(size); @@ -637,7 +615,7 @@ void allReduceInplace( math::Vector3<T> & values, Operation operation, MPI_Comm WALBERLA_NON_MPI_SECTION() { return; } - MPI_Allreduce( MPI_IN_PLACE, values.data(), 3, MPITrait<T>::type(), toMPI_Op(operation), comm ); + MPI_Allreduce( MPI_IN_PLACE, values.data(), 3, MPITrait<T>::type(), MPITrait<T>::operation(operation), comm ); } @@ -663,7 +641,7 @@ inline void allReduceInplace( math::Vector3<bool> & bools, Operation operation, math::Vector3<int> intValues{bools[0] ? 1 : 0, bools[1] ? 1 : 0, bools[2] ? 1 : 0}; - MPI_Allreduce( MPI_IN_PLACE, intValues.data(), 3, MPITrait<int>::type(), toMPI_Op(operation), comm ); + MPI_Allreduce( MPI_IN_PLACE, intValues.data(), 3, MPITrait<int>::type(), MPITrait<int>::operation(operation), comm ); for(uint_t i = 0; i < 3; ++i) { diff --git a/src/core/timing/Timer.h b/src/core/timing/Timer.h index 9f7c3f97d1066ff7ffa322cb5d6550c9f5d5013b..efc7016409a6e91d8c83b96aebfbd9fc40c4a8cf 100644 --- a/src/core/timing/Timer.h +++ b/src/core/timing/Timer.h @@ -30,6 +30,7 @@ #include "WcPolicy.h" #include "core/DataTypes.h" +#include "core/mpi/MPIManager.h" #include "core/mpi/RecvBuffer.h" #include "core/mpi/Reduce.h" #include "core/mpi/SendBuffer.h" @@ -527,7 +528,7 @@ shared_ptr<Timer<TP> > getReduced( Timer<TP>& timer, ReduceType rt, int targetRa } //uint_t counter, double min, double max, double total, double sumOfSquares - if ( targetRank < 0 || targetRank == MPIManager::instance()->worldRank() ) + if ( targetRank < 0 || targetRank == mpi::MPIManager::instance()->worldRank() ) return make_shared<Timer<TP> >( mpi::MPIManager::instance()->numProcesses(), min, max, total, sumOfSquares ); return nullptr; diff --git a/src/domain_decomposition/BlockStorage.h b/src/domain_decomposition/BlockStorage.h index b59f7b30fe803a2c9abbe48c8856c9b4bbf2be20..2f29acdb7fd45c1e287ebf6d4601b641f303506b 100644 --- a/src/domain_decomposition/BlockStorage.h +++ b/src/domain_decomposition/BlockStorage.h @@ -723,6 +723,12 @@ inline void BlockStorage::clearBlockData( const BlockDataID & id ) { for( auto block = begin(); block != end(); ++block ) block->deleteData( id ); + + //also delete block data from data handling vector + auto elementToErase = std::remove_if(blockDataItem_.begin(), blockDataItem_.end(), + [id](const internal::BlockDataItem& dataItem) + { return dataItem.getId() == id; }); + blockDataItem_.erase(elementToErase, blockDataItem_.end()); } diff --git a/src/field/distributors/KernelDistributor.h b/src/field/distributors/KernelDistributor.h index 712d9a7b7e32ae96b0d259873d102166675b22b3..bc0abb8fa5882e51960d936132cd3613d55f5172 100644 --- a/src/field/distributors/KernelDistributor.h +++ b/src/field/distributors/KernelDistributor.h @@ -62,7 +62,7 @@ public: WALBERLA_ASSERT(baseField.nrOfGhostLayers() > uint_t(0), "field for kernel distribution needs at least one ghost layer"); } - inline bool operator==( const OwnType & other ){ return baseField_ == other.baseField_; } + inline bool operator==( const OwnType & other ) const { return baseField_ == other.baseField_; } template< typename ForwardIterator_T > inline void distribute( const Vector3<real_t> & position, ForwardIterator_T distributeValueBegin ) diff --git a/src/field/distributors/NearestNeighborDistributor.h b/src/field/distributors/NearestNeighborDistributor.h index 932f443b3e30af9ab6a2bc4f6c6f3f585e485473..c4819cb9119d46e46dbc34bed40228664bb5cffb 100644 --- a/src/field/distributors/NearestNeighborDistributor.h +++ b/src/field/distributors/NearestNeighborDistributor.h @@ -59,7 +59,7 @@ public: : blockStorage_( blockStorage ), block_( block ), baseField_( baseField ), flagField_( flagField ), evaluationMask_( evaluationMask ) {} - inline bool operator==( const OwnType & other ){ return baseField_ == other.baseField_; } + inline bool operator==( const OwnType & other ) const { return baseField_ == other.baseField_; } template< typename ForwardIterator_T > inline void distribute( const Vector3<real_t> & position, ForwardIterator_T distributeValueBegin ) diff --git a/src/field/interpolators/KernelFieldInterpolator.h b/src/field/interpolators/KernelFieldInterpolator.h index 0e59fabf21dfd4899e62ca29961811a0e99e59d9..0f5987e76e844df9505869fbaab3feadacbc88fa 100644 --- a/src/field/interpolators/KernelFieldInterpolator.h +++ b/src/field/interpolators/KernelFieldInterpolator.h @@ -105,7 +105,7 @@ public: } - inline bool operator==( const OwnType & other ){ return baseField_ == other.baseField_; } + inline bool operator==( const OwnType & other ) const { return baseField_ == other.baseField_; } template< typename ForwardIterator_T > inline void get( const Vector3<real_t> & position, ForwardIterator_T interpolationResultBegin ) diff --git a/src/field/interpolators/NearestNeighborFieldInterpolator.h b/src/field/interpolators/NearestNeighborFieldInterpolator.h index b5b5cba7f65a3e06517356933d157108ba81e90c..bb08276f9f12949a10461e96b1a815acd64f2559 100644 --- a/src/field/interpolators/NearestNeighborFieldInterpolator.h +++ b/src/field/interpolators/NearestNeighborFieldInterpolator.h @@ -57,7 +57,7 @@ public: {} - inline bool operator==( const OwnType & other ){ return baseField_ == other.baseField_; } + inline bool operator==( const OwnType & other ) const { return baseField_ == other.baseField_; } template< typename ForwardIterator_T > inline void get( const Vector3<real_t> & position, ForwardIterator_T interpolationResultBegin ) diff --git a/src/field/interpolators/TrilinearFieldInterpolator.h b/src/field/interpolators/TrilinearFieldInterpolator.h index e9809d835f1bf67e89da8a0eb2b5a9493624c84e..351ed7afea09db1c75feeb59dd771afb0796949e 100644 --- a/src/field/interpolators/TrilinearFieldInterpolator.h +++ b/src/field/interpolators/TrilinearFieldInterpolator.h @@ -62,7 +62,7 @@ public: } - inline bool operator==( const OwnType & other ){ return baseField_ == other.baseField_; } + inline bool operator==( const OwnType & other ) const { return baseField_ == other.baseField_; } template< typename ForwardIterator_T > inline void get( const Vector3<real_t> & position, ForwardIterator_T interpolationResultBegin ) diff --git a/src/gpu/FieldAccessor.h b/src/gpu/FieldAccessor.h index cd50cc58d6e1c6ef708a1cc50e7fbcc897933281..fc1214e081c0822fdc1370ad9e69f44c2e986594 100644 --- a/src/gpu/FieldAccessor.h +++ b/src/gpu/FieldAccessor.h @@ -78,7 +78,7 @@ namespace gpu __device__ __forceinline__ bool isValidPosition() { return true; } __device__ T & get() { return * (T*)(ptr_); } - __device__ T & get( int f) { return * (T*)(ptr_ + f * fOffset_); } + __device__ T & get( uint_t f) { return * (T*)(ptr_ + f * fOffset_); } __device__ T & getNeighbor( int cx, int cy, int cz ) const @@ -88,7 +88,7 @@ namespace gpu cz * zOffset_ ); } - __device__ T & getNeighbor( int cx, int cy, int cz, int cf ) + __device__ T & getNeighbor( int cx, int cy, int cz, uint_t cf ) { return * (T*)( ptr_ + cx * xOffset_ + cy * yOffset_ + diff --git a/src/lbm_generated/refinement/BasicRecursiveTimeStep.impl.h b/src/lbm_generated/refinement/BasicRecursiveTimeStep.impl.h index 63d8da5b4510e09ed10ebbfb73e0d761f4ae407d..e074e9737172f56047c18eae864f9d775985b102 100644 --- a/src/lbm_generated/refinement/BasicRecursiveTimeStep.impl.h +++ b/src/lbm_generated/refinement/BasicRecursiveTimeStep.impl.h @@ -195,8 +195,7 @@ std::function<void()> BasicRecursiveTimeStep< PdfField_T, SweepCollection_T, Bo template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T > -void BasicRecursiveTimeStep< PdfField_T, SweepCollection_T, BoundaryCollection_T >::ghostLayerPropagation( - Block * block) +void BasicRecursiveTimeStep< PdfField_T, SweepCollection_T, BoundaryCollection_T >::ghostLayerPropagation(Block * block) { auto pdfField = block->getData<PdfField_T>(pdfFieldId_); diff --git a/src/vtk/VTKOutput.cpp b/src/vtk/VTKOutput.cpp index 61df10854dfd150eeca85b070c8caac5c1d89318..24f2c0e26bfc3284b0dd6932fe27ed062fd1be41 100644 --- a/src/vtk/VTKOutput.cpp +++ b/src/vtk/VTKOutput.cpp @@ -1759,17 +1759,15 @@ void VTKOutput::writeCollectors( const bool barrier ) WALBERLA_MPI_WORLD_BARRIER(); WALBERLA_NON_ROOT_SECTION() { return; } - WALBERLA_ASSERT_EQUAL( MPIManager::instance()->worldRank(), 0 ); - + if(!amrFileFormat_) + writePVD(); for( auto collector = collectorsToWrite_.begin(); collector != collectorsToWrite_.end(); ++collector ) { if( uniformGrid_ ) { - writePVD(); - if( samplingDx_ <= real_c(0) || samplingDy_ <= real_c(0) || samplingDz_ <= real_c(0) ) writePVTI( *collector ); else @@ -1782,7 +1780,6 @@ void VTKOutput::writeCollectors( const bool barrier ) } else { - writePVD(); writePVTU( *collector ); // also applies for outputDomainDecomposition_ == true and pointDataSource_ != NULL // and polylineDataSource_ != NULL (uniformGrid_ will be false) } @@ -1987,15 +1984,6 @@ void VTKOutput::writeVTHB( const uint_t collector ) const ofs << " <Block level=\"" << level << "\">\n"; uint index = 0; for( auto file = files[level].begin(); file != files[level].end(); ++file ){ - std::ifstream ifs( file->string().c_str() ); - - std::string piece; - for( uint_t i = 0; i != 4; ++i ) - std::getline( ifs, piece ); - ifs.close(); - - piece.erase( piece.length()-1, 1 ); - ofs << " <DataSet index=\"" << index << "\" file=\"" << executionFolder_ << "_" << collector << "/" << file->filename().string() << "\"/>\n"; index++; } diff --git a/tests/core/CMakeLists.txt b/tests/core/CMakeLists.txt index 8d3f0298ac3bf387dfb19d18b46bfaff8caa6912..604de6371b573b350638ea4fb4f003d93960cb7c 100644 --- a/tests/core/CMakeLists.txt +++ b/tests/core/CMakeLists.txt @@ -119,6 +119,11 @@ waLBerla_execute_test( NAME SetReductionTest4 COMMAND $<TARGET_FILE:SetReductio waLBerla_execute_test( NAME SetReductionTest5 COMMAND $<TARGET_FILE:SetReductionTest> PROCESSES 5 ) waLBerla_execute_test( NAME SetReductionTest27 COMMAND $<TARGET_FILE:SetReductionTest> PROCESSES 27 ) +if ( WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT ) + waLBerla_compile_test( Name MPIFloat16Test FILES mpi/MPIFloat16Test.cpp DEPENDS core ) + target_compile_features( MPIFloat16Test PUBLIC cxx_std_23 ) + waLBerla_execute_test( NAME MPIFloat16Test4 COMMAND $<TARGET_FILE:MPIFloat16Test> PROCESSES 4) +endif () ############## @@ -172,9 +177,6 @@ waLBerla_compile_test( FILES DebugSTLTest.cpp ) waLBerla_execute_test( NAME DebugSTLTest ) set_tests_properties(DebugSTLTest PROPERTIES WILL_FAIL TRUE) -waLBerla_compile_test( FILES FP16Test.cpp ) -waLBerla_execute_test( NAME FP16Test ) - waLBerla_compile_test( FILES FunctionTraitsTest.cpp ) waLBerla_execute_test( NAME FunctionTraitsTest ) @@ -235,4 +237,7 @@ if ( WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT ) # Which features are actually supported depend on the compiler target_compile_features( Float16SupportTest PUBLIC cxx_std_23 ) waLBerla_execute_test(NAME Float16SupportTest) + + waLBerla_compile_test( FILES FP16Test.cpp ) + waLBerla_execute_test( NAME FP16Test ) endif () \ No newline at end of file diff --git a/tests/core/FP16Test.cpp b/tests/core/FP16Test.cpp index 60a2be0eeee0872449f6a648fa1c65abbbda7f42..e08dd55b099c8edc36f97ab4f86a613d6671d1ac 100644 --- a/tests/core/FP16Test.cpp +++ b/tests/core/FP16Test.cpp @@ -70,7 +70,7 @@ void fp16Test( int argc, char ** argv ) const float16 y = -1.8f16; const float64 z = -0.6; WALBERLA_LOG_INFO_ON_ROOT(" + " << (double) x << " + " << (double) y << " == " << (float64) (x + y) << " ? ") - WALBERLA_CHECK_FLOAT_EQUAL((float64) (x + y), z, "float16 addition does not work correctly."); + WALBERLA_CHECK_FLOAT_EQUAL( (x + y), (float16) z, "float16 addition does not work correctly."); #endif } diff --git a/tests/core/Float16SupportTest.cpp b/tests/core/Float16SupportTest.cpp index 04ea9378f54eee4ee78f81177fc609d732da21c5..5116886ff4e154cecb21465acc39e40caea776dc 100644 --- a/tests/core/Float16SupportTest.cpp +++ b/tests/core/Float16SupportTest.cpp @@ -19,14 +19,16 @@ // //====================================================================================================================== -#include <memory> -#include <numeric> - #include "core/DataTypes.h" #include "core/Environment.h" #include "core/logging/Logging.h" +#include <numeric> + namespace walberla::simple_Float16_test { + + +#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT using walberla::floatIsEqual; using walberla::real_t; using walberla::uint_c; @@ -90,12 +92,12 @@ void vector_test() fpDst_cast.assign( 10, (dst_t) 1.5 ); fp32.assign( 10, 1.5f ); std::copy( fpSrc.begin(), fpSrc.end(), fpDst.begin() ); - WALBERLA_LOG_WARNING_ON_ROOT( + WALBERLA_LOG_INFO_ON_ROOT( " std::vector.assign is not able to assign " << typeid( src_t ).name() << " values to container of type " << precisionType << ".\n" << " Therefore, the floating point value for assign must be cast beforehand or std::copy must be used, since it uses a static_cast internally." ); - fpSrc[5] = 2.3; + fpSrc[5] = real_c(2.3); fpDst_cast[5] = (dst_t) 2.3; fp32[5] = 2.3f; fpDst[5] = (dst_t) 2.3; @@ -118,7 +120,7 @@ void vector_test() WALBERLA_CHECK_FLOAT_EQUAL( (dst_t)sumSrc, sumDst ); } { - fpSrc.assign( 13, 1.3 ); + fpSrc.assign(13, real_c(1.3)); std::copy( fpSrc.begin(), fpSrc.end(), fpDst.begin() ); const auto sumSrc = std::reduce(fpSrc.begin(), fpSrc.end()); const auto sumDst = std::reduce(fpDst.begin(), fpDst.end()); @@ -126,8 +128,11 @@ void vector_test() } } // simple_Float16_test::vector_test() +#endif // WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT + int main( int argc, char** argv ) { +#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT // This check only works since C++23 and is used in many implementations, so it's important, that it works. WALBERLA_CHECK( std::is_arithmetic< dst_t >::value ); @@ -149,15 +154,17 @@ int main( int argc, char** argv ) WALBERLA_LOG_INFO_ON_ROOT( " Start a where float32 is sufficient but float16 is not." ); WALBERLA_CHECK_FLOAT_UNEQUAL( dst_t(1.0)-dst_t(0.3), 1.0-0.3 ); WALBERLA_CHECK_FLOAT_EQUAL( 1.0f-0.3f, 1.0-0.3 ); +#else + WALBERLA_LOG_WARNING_ON_ROOT( "\nWALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT is not enabled. So this test cannot be run!\n" ) +#endif // WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT - return 0; + return EXIT_SUCCESS; } // simple_Float16_test::main() } // namespace walberla::simple_Float16_test int main( int argc, char** argv ) { - walberla::simple_Float16_test::main( argc, argv ); + return walberla::simple_Float16_test::main( argc, argv ); - return EXIT_SUCCESS; } // main() diff --git a/tests/core/mpi/MPIFloat16Test.cpp b/tests/core/mpi/MPIFloat16Test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..be0bc5aa4f86eacc22f223c165f1dd943d5dbd56 --- /dev/null +++ b/tests/core/mpi/MPIFloat16Test.cpp @@ -0,0 +1,162 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file MPIFloat16.cpp +//! \ingroup core +//! \author Michael Zikeli <michael.zikeli@fau.de> +//! \brief This test is to check whether the self defined MPI_Datatype and the self defined Operators are working. +//! To verify the type, some parts of the BufferSystemTest are just copied. +//! To verify the operations, a simple AllReduce is executed for each operation. +//! For now only { SUM, MIN, MAX } are implemented, thus only those are tested. +// +//====================================================================================================================== + +#include "core/Abort.h" +#include "core/debug/TestSubsystem.h" +#include "core/logging/Logging.h" +#include "core/mpi/BufferSystem.h" +#include "core/mpi/Environment.h" +#include "core/mpi/Reduce.h" + + +namespace walberla::mpifloat16test +{ + +using mpi::BufferSystem; +using namespace std::literals::chrono_literals; + +#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT +void symmetricCommunication() +{ + const int MSG_SIZE = 10; + + auto mpiManager = MPIManager::instance(); + + const int numProcesses = mpiManager->numProcesses(); + const int rank = mpiManager->worldRank(); + const int leftNeighbor = (rank-1+numProcesses) % numProcesses; + const int rightNeighbor = (rank+1) % numProcesses; + + WALBERLA_CHECK_GREATER_EQUAL( numProcesses, 3 ); + + + BufferSystem bs ( MPI_COMM_WORLD ); + + // Pack Message to left neighbor containing own rank + for( int i=0; i< MSG_SIZE; ++i ) + bs.sendBuffer( leftNeighbor ) << numeric_cast<float16>(rank) + float16(0.3); + + // Pack Message to right neighbor containing own rank + for( int i=0; i< MSG_SIZE; ++i ) + bs.sendBuffer( rightNeighbor ) << numeric_cast<float16>(rank) - float16(0.3); + + bs.setReceiverInfoFromSendBufferState( true, false ); + bs.sendAll(); + + for( auto it = bs.begin(); it != bs.end(); ++it ) + { + WALBERLA_CHECK ( it.rank() == leftNeighbor || it.rank() == rightNeighbor ); + WALBERLA_CHECK_EQUAL( it.buffer().size(), MSG_SIZE * sizeof(float16) + MSG_SIZE * mpi::BUFFER_DEBUG_OVERHEAD ); + + auto receivedVal = float16(-1); + it.buffer() >> receivedVal; + + WALBERLA_CHECK_EQUAL( typeid(receivedVal), typeid(float16) ); + + if ( it.rank() == leftNeighbor ) + { + WALBERLA_CHECK_FLOAT_EQUAL( receivedVal, numeric_cast<float16>( it.rank() ) - float16(0.3) ); + WALBERLA_CHECK_FLOAT_UNEQUAL( receivedVal, numeric_cast<float16>( it.rank() ) + float16(0.3), 0.5); + } else { + WALBERLA_CHECK_FLOAT_EQUAL( receivedVal, numeric_cast<float16>( it.rank() ) + float16(0.3) ); + WALBERLA_CHECK_FLOAT_UNEQUAL( receivedVal, numeric_cast<float16>( it.rank() ) - float16(0.3), 0.5); + } + } + + WALBERLA_CHECK_EQUAL( bs.getBytesSent(), (MSG_SIZE * sizeof(float16) + MSG_SIZE * mpi::BUFFER_DEBUG_OVERHEAD) * 2 ); + WALBERLA_CHECK_EQUAL( bs.getBytesReceived(), (MSG_SIZE * sizeof(float16) + MSG_SIZE * mpi::BUFFER_DEBUG_OVERHEAD) * 2 ); +} + +void reduce( ) +{ + + auto mpiManager = MPIManager::instance(); + + const int numProcesses = mpiManager->numProcesses(); + const int rank = mpiManager->worldRank(); + + const auto startValue = numeric_cast<float16>(rank) + float16(0.3); + + // SUM + auto value = startValue; + + walberla::mpi::allReduceInplace( value, walberla::mpi::SUM ); + + auto sum = float16( 0.0 ); + for( int i = 0; i < numProcesses; ++i ) + sum += numeric_cast<float16>(i) + float16(0.3); + WALBERLA_CHECK_FLOAT_EQUAL( value, sum ); + WALBERLA_CHECK_FLOAT_UNEQUAL( value, ((numProcesses*(numProcesses-1)/2.)+0.3*numProcesses), 1e-10 ); + + // MIN + value = startValue; + + walberla::mpi::allReduceInplace( value, walberla::mpi::MIN ); + WALBERLA_CHECK_FLOAT_EQUAL( value, numeric_cast<float16>(0.3) ); + + // MAX + value = startValue; + + walberla::mpi::allReduceInplace( value, walberla::mpi::MAX ); + WALBERLA_CHECK_FLOAT_EQUAL( value, numeric_cast<float16>(numProcesses-1)+numeric_cast<float16>(0.3) ); + +} +#endif // WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT + +int main( int argc, char** argv ) +{ +#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT + mpi::Environment mpiEnv( argc, argv ); + debug::enterTestMode(); + walberla::logging::Logging::instance()->setLogLevel( walberla::logging::Logging::INFO ); + + auto mpiManager = MPIManager::instance(); + auto numProcesses = mpiManager->numProcesses(); + + if(numProcesses <= 2) + { + WALBERLA_ABORT("This test has to be executed on at least 3 processes. Executed on " << numProcesses); + return EXIT_FAILURE; + } + + WALBERLA_LOG_INFO_ON_ROOT("Testing Symmetric Communication..."); + symmetricCommunication(); + + WALBERLA_LOG_INFO_ON_ROOT("Testing Reduce operations..."); + reduce( ); +#else + WALBERLA_LOG_WARNING_ON_ROOT( "\nWALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT is not enabled. So this test cannot be run!\n" ) +#endif // WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT + + return EXIT_SUCCESS; + +} // mpifloat16test::main() + +} // namespace walberla::mpifloat16test + +int main( int argc, char** argv ) +{ + return walberla::mpifloat16test::main( argc, argv ); +} // main() diff --git a/tests/lbm_generated/CMakeLists.txt b/tests/lbm_generated/CMakeLists.txt index d7a7ef76bd6b9a605d19cd7e2bbdf4bedddbed7b..8ba33e735229aacb7a91f89f2f3912d8d1b61f85 100644 --- a/tests/lbm_generated/CMakeLists.txt +++ b/tests/lbm_generated/CMakeLists.txt @@ -6,8 +6,11 @@ waLBerla_link_files_to_builddir( "*.prm" ) waLBerla_link_files_to_builddir( "*.py" ) +if( WALBERLA_BUILD_WITH_CODEGEN ) + waLBerla_generate_target_from_python(NAME ExampleGenerated FILE Example.py + CODEGEN_CFG example_codegen OUT_FILES LBMStorageSpecification.h LBMStorageSpecification.cpp LBMSweepCollection.h LBMSweepCollection.cpp NoSlip.h NoSlip.cpp @@ -16,6 +19,36 @@ waLBerla_generate_target_from_python(NAME ExampleGenerated Example_InfoHeader.h) waLBerla_compile_test( FILES Example.cpp DEPENDS ExampleGenerated blockforest field lbm_generated timeloop ) +waLBerla_generate_target_from_python(NAME InterpolationNoSlipGenerated + FILE InterpolationNoSlip.py + CODEGEN_CFG interpolation_no_slip_codegen + OUT_FILES InterpolationNoSlipStorageSpecification.h InterpolationNoSlipStorageSpecification.cpp + InterpolationNoSlipSweepCollection.h InterpolationNoSlipSweepCollection.cpp + NoSlip.h NoSlip.cpp + NoSlipBouzidi.h NoSlipBouzidi.cpp + NoSlipQuadraticBB.h NoSlipQuadraticBB.cpp + UBB.h UBB.cpp + InterpolationNoSlipBoundaryCollection.h + InterpolationNoSlipHeader.h) + +waLBerla_compile_test( FILES InterpolationNoSlip.cpp DEPENDS InterpolationNoSlipGenerated blockforest core field geometry lbm_generated timeloop ) +# waLBerla_execute_test( NAME InterpolationNoSlip1 COMMAND $<TARGET_FILE:InterpolationNoSlip> ${CMAKE_CURRENT_SOURCE_DIR}/InterpolationNoSlip.prm -Parameters.distanceWall=0.1 ) +# waLBerla_execute_test( NAME InterpolationNoSlip2 COMMAND $<TARGET_FILE:InterpolationNoSlip> ${CMAKE_CURRENT_SOURCE_DIR}/InterpolationNoSlip.prm -Parameters.distanceWall=0.5 ) +waLBerla_execute_test( NAME InterpolationNoSlip3 COMMAND $<TARGET_FILE:InterpolationNoSlip> ${CMAKE_CURRENT_SOURCE_DIR}/InterpolationNoSlip.prm ) +endif() + +waLBerla_generate_target_from_python(NAME FreeSlipRefinementGenerated + FILE FreeSlipRefinement.py + CODEGEN_CFG free_slip_refinement_codegen + OUT_FILES FreeSlipRefinementStorageSpecification.h FreeSlipRefinementStorageSpecification.cpp + FreeSlipRefinementSweepCollection.h FreeSlipRefinementSweepCollection.cpp + FreeSlip.h FreeSlip.cpp + UBB.h UBB.cpp + Outflow.h Outflow.cpp + FreeSlipRefinementBoundaryCollection.h + FreeSlipRefinementInfoHeader.h) +waLBerla_compile_test( FILES FreeSlipRefinement.cpp DEPENDS FreeSlipRefinementGenerated blockforest field lbm_generated timeloop ) + if( WALBERLA_DOUBLE_ACCURACY ) waLBerla_compile_test( FILES LDC.cpp DEPENDS blockforest field lbm_generated timeloop ) endif() diff --git a/tests/lbm_generated/FreeSlipRefinement.cpp b/tests/lbm_generated/FreeSlipRefinement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..42d91a6eecbd9ad8e25a88c9a4dd4c70986d347e --- /dev/null +++ b/tests/lbm_generated/FreeSlipRefinement.cpp @@ -0,0 +1,280 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file FreeSlipRefinement.cpp +//! \author Markus Holzer <markus.holzer@fau.de> +//! \brief Channel flow with inlet and outlet on West and East. The rest of the Boundaries consist of FreeSlip. The +//! Channel flow should reach the inlet velocity in the whole domain because the FreeSlip BC will not provide a +//! restriction on it. With the D3Q27 stencil this only works if the BC is also set on the first fluid node +// +//====================================================================================================================== + +#include "blockforest/Initialization.h" +#include "blockforest/communication/NonUniformBufferedScheme.h" + +#include "core/DataTypes.h" +#include "core/Environment.h" +#include "core/debug/TestSubsystem.h" +#include "core/logging/Initialization.h" +#include "core/math/Vector3.h" +#include "core/timing/RemainingTimeLogger.h" + +#include "field/AddToStorage.h" +#include "field/FlagField.h" +#include "field/GhostLayerField.h" +#include "field/vtk/VTKWriter.h" + +#include "geometry/InitBoundaryHandling.h" + +#include "timeloop/SweepTimeloop.h" + +#include "lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h" +#include "lbm_generated/field/AddToStorage.h" +#include "lbm_generated/field/PdfField.h" +#include "lbm_generated/refinement/BasicRecursiveTimeStep.h" + +// include the generated header file. It includes all generated classes +#include "FreeSlipRefinementInfoHeader.h" + +using namespace walberla; +using namespace std::placeholders; + +using StorageSpecification_T = lbm::FreeSlipRefinementStorageSpecification; +using Stencil_T = StorageSpecification_T::Stencil; +using CommunicationStencil_T = StorageSpecification_T::CommunicationStencil; +using PdfField_T = lbm_generated::PdfField< StorageSpecification_T >; + +using SweepCollection_T = lbm::FreeSlipRefinementSweepCollection; + +using VectorField_T = GhostLayerField< real_t, StorageSpecification_T::Stencil::D >; +using ScalarField_T = GhostLayerField< real_t, 1 >; + +using flag_t = walberla::uint8_t; +using FlagField_T = FlagField< flag_t >; +using BoundaryCollection_T = lbm::FreeSlipRefinementBoundaryCollection< FlagField_T >; + +using RefinementSelectionFunctor = SetupBlockForest::RefinementSelectionFunction; + +class ChannelRefinement +{ + public: + ChannelRefinement(const uint_t depth) : refinementDepth_(depth){}; + + void operator()(SetupBlockForest& forest) + { + std::vector< SetupBlock* > blocks; + forest.getBlocks(blocks); + AABB refinementAABB = AABB(forest.getDomain().xSize() / 2 - 1, forest.getDomain().yMin(), forest.getDomain().zSize() / 2 - 1, + forest.getDomain().xSize() / 2 + 1, forest.getDomain().yMin() + 1, forest.getDomain().zSize() / 2 + 1 ); + + for (auto b : blocks) + { + if (refinementAABB.intersects(b->getAABB()) && b->getLevel() < refinementDepth_) + { + b->setMarker(true); + } + } + } + + private: + const uint_t refinementDepth_; +}; + +class Channel +{ + public: + Channel(const uint_t depth) : refinementDepth_(depth), freeSlipFlagUID_("FreeSlip"), ubbFlagUID_("UBB"), outflowFlagUID_("Outflow"){}; + + RefinementSelectionFunctor refinementSelector() { return ChannelRefinement(refinementDepth_); } + void setupBoundaryFlagField(StructuredBlockForest& sbfs, const BlockDataID flagFieldID) + { + for (auto bIt = sbfs.begin(); bIt != sbfs.end(); ++bIt) + { + Block& b = dynamic_cast< Block& >(*bIt); + uint_t level = b.getLevel(); + auto flagField = b.getData< FlagField_T >(flagFieldID); + uint8_t freeSlipFlag = flagField->registerFlag(freeSlipFlagUID_); + uint8_t ubbFlag = flagField->registerFlag(ubbFlagUID_); + uint8_t outflowFlag = flagField->registerFlag(outflowFlagUID_); + for (auto cIt = flagField->beginWithGhostLayerXYZ(2); cIt != flagField->end(); ++cIt) + { + Cell localCell = cIt.cell(); + Cell globalCell(localCell); + sbfs.transformBlockLocalToGlobalCell(globalCell, b); + if (globalCell.x() < 0) { flagField->addFlag(localCell, ubbFlag); } + else if (globalCell.x() >= cell_idx_c(sbfs.getNumberOfXCells(level))) { flagField->addFlag(localCell, outflowFlag); } + else if (globalCell.y() >= cell_idx_c(sbfs.getNumberOfYCells(level))) { flagField->addFlag(localCell, freeSlipFlag); } + else if (globalCell.y() < cell_idx_c(1 << level)) {flagField->addFlag(localCell, freeSlipFlag);} + } + } + } + + private: + const std::string refinementProfile_; + const uint_t refinementDepth_; + + const FlagUID freeSlipFlagUID_; + const FlagUID ubbFlagUID_; + const FlagUID outflowFlagUID_; +}; + +static void createSetupBlockForest(SetupBlockForest& setupBfs, const Config::BlockHandle& domainSetup, Channel& setup) +{ + Vector3< real_t > domainSize = domainSetup.getParameter< Vector3< real_t > >("domainSize"); + Vector3< uint_t > rootBlocks = domainSetup.getParameter< Vector3< uint_t > >("rootBlocks"); + Vector3< bool > periodic = domainSetup.getParameter< Vector3< bool > >("periodic"); + + auto refSelection = setup.refinementSelector(); + setupBfs.addRefinementSelectionFunction(std::function< void(SetupBlockForest&) >(refSelection)); + AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), domainSize[0], domainSize[1], domainSize[2]); + setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2], periodic[0], periodic[1], periodic[2]); + setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalance(true), uint_c(MPIManager::instance()->numProcesses())); +} + +int main(int argc, char** argv) +{ + walberla::Environment walberlaEnv(argc, argv); + mpi::MPIManager::instance()->useWorldComm(); + + logging::configureLogging(walberlaEnv.config()); + + // read parameters + auto domainSetup = walberlaEnv.config()->getOneBlock("DomainSetup"); + auto parameters = walberlaEnv.config()->getOneBlock("Parameters"); + + const real_t omega = parameters.getParameter< real_t >("omega"); + const real_t inletVelocity = parameters.getParameter< real_t >("inletVelocity"); + const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(10)) + uint_c(1); + const uint_t refinementDepth = parameters.getParameter< uint_t >("refinementDepth", uint_c(1)); + + auto loggingParameters = walberlaEnv.config()->getOneBlock("Logging"); + bool writeSetupForestAndReturn = loggingParameters.getParameter<bool>("writeSetupForestAndReturn", false); + + auto remainingTimeLoggerFrequency = + parameters.getParameter< real_t >("remainingTimeLoggerFrequency", real_c(3.0)); // in seconds + + auto flowSetup = std::make_shared< Channel >(refinementDepth); + + SetupBlockForest setupBfs; + WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...") + createSetupBlockForest(setupBfs, domainSetup, *flowSetup); + + // Create structured block forest + Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock"); + + WALBERLA_LOG_INFO_ON_ROOT("Creating structured block forest...") + auto bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs); + auto blocks = std::make_shared< StructuredBlockForest >(bfs, cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2]); + blocks->createCellBoundingBoxes(); + + if (writeSetupForestAndReturn) + { + WALBERLA_LOG_INFO_ON_ROOT("Writing SetupBlockForest to VTK file") + WALBERLA_ROOT_SECTION() { vtk::writeDomainDecomposition(blocks, "FreeSlipRefinementDomainDecomposition", "vtk_out"); } + + WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks()) + for (uint_t level = 0; level <= refinementDepth; level++) + { + WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << setupBfs.getNumberOfBlocks(level)) + } + WALBERLA_LOG_INFO_ON_ROOT("Ending program") + return EXIT_SUCCESS; + } + + WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks()) + for (uint_t level = 0; level <= refinementDepth; level++) + { + WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << setupBfs.getNumberOfBlocks(level)) + } + + StorageSpecification_T StorageSpec = StorageSpecification_T(); + BlockDataID pdfFieldId = lbm_generated::addPdfFieldToStorage(blocks, "pdf field", StorageSpec, uint_c(2)); + BlockDataID velFieldId = field::addToStorage< VectorField_T >(blocks, "Velocity", real_c(0.0), field::fzyx); + + BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field", uint_c(3)); + + SweepCollection_T sweepCollection(blocks, pdfFieldId, velFieldId, omega); + for (auto& block : *blocks) + { + sweepCollection.initialise(&block); + } + + const FlagUID fluidFlagUID("Fluid"); + flowSetup->setupBoundaryFlagField(*blocks, flagFieldId); + geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldId, fluidFlagUID, 2); + BoundaryCollection_T boundaryCollection(blocks, flagFieldId, pdfFieldId, fluidFlagUID, inletVelocity); + + WALBERLA_LOG_INFO_ON_ROOT("Setting up communication...") + auto comm = + std::make_shared< blockforest::communication::NonUniformBufferedScheme< CommunicationStencil_T > >(blocks); + auto packInfo = lbm_generated::setupNonuniformPdfCommunication< PdfField_T >(blocks, pdfFieldId); + comm->addPackInfo(packInfo); + + lbm_generated::BasicRecursiveTimeStep< PdfField_T, SweepCollection_T, BoundaryCollection_T > timestep( + blocks, pdfFieldId, sweepCollection, boundaryCollection, comm, packInfo); + + SweepTimeloop timeloop(blocks->getBlockStorage(), timesteps); + uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0); + if (vtkWriteFrequency > 0) + { + auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "FreeSlipRefinementVTK", vtkWriteFrequency, 0, false, "vtk_out", + "simulation_step", false, true, true, false, 0); + + auto velWriter = make_shared< field::VTKWriter< VectorField_T > >(velFieldId, "velocity"); + vtkOutput->addBeforeFunction([&]() { + for (auto& block : *blocks) + { + sweepCollection.calculateMacroscopicParameters(&block); + } + }); + + vtkOutput->addCellDataWriter(velWriter); + timeloop.addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output"); + } + timeloop.addFuncAfterTimeStep(timestep); + + // log remaining time + if (remainingTimeLoggerFrequency > 1.0) + { + timeloop.addFuncAfterTimeStep(timing::RemainingTimeLogger(timeloop.getNrOfTimeSteps(), remainingTimeLoggerFrequency), + "remaining time logger"); + } + + WALBERLA_LOG_INFO_ON_ROOT("Starting Simulation with " << timesteps << " timesteps") + + timeloop.run(); + + + for (auto& block : *blocks) + { + sweepCollection.calculateMacroscopicParameters(&block); + Block& b = dynamic_cast< Block& >(block); + uint_t level = b.getLevel(); + + auto velField = b.getData< VectorField_T >(velFieldId); + for( auto it = velField->beginXYZ(); it != velField->end(); ++it ) + { + Cell localCell = it.cell(); + Cell globalCell(localCell); + blocks->transformBlockLocalToGlobalCell(globalCell, b); + + if (globalCell.y() >= (cell_idx_c(1 << level))) + { + WALBERLA_CHECK_FLOAT_EQUAL_EPSILON(it.getF(0), inletVelocity, real_c(1e-5)); + } + } + } + return EXIT_SUCCESS; +} diff --git a/tests/lbm_generated/FreeSlipRefinement.prm b/tests/lbm_generated/FreeSlipRefinement.prm new file mode 100644 index 0000000000000000000000000000000000000000..9149c34d4ab19ff71af337e5be8ad80e2d1e714d --- /dev/null +++ b/tests/lbm_generated/FreeSlipRefinement.prm @@ -0,0 +1,26 @@ +Parameters +{ + omega 1.95; + inletVelocity 0.05; + timesteps 1000; + refinementDepth 1; + + remainingTimeLoggerFrequency 0; // in seconds + vtkWriteFrequency 0; +} + +DomainSetup +{ + domainSize <32, 16, 16>; + rootBlocks <4, 2, 2>; + + cellsPerBlock < 8, 8, 8 >; + periodic < 0, 0, 1 >; +} + +Logging +{ + logLevel info; // info progress detail tracing + writeSetupForestAndReturn false; +} + diff --git a/tests/lbm_generated/FreeSlipRefinement.py b/tests/lbm_generated/FreeSlipRefinement.py new file mode 100644 index 0000000000000000000000000000000000000000..e695a8aedddea69bd9b8656de04eb87b76ce64fe --- /dev/null +++ b/tests/lbm_generated/FreeSlipRefinement.py @@ -0,0 +1,49 @@ +import sympy as sp + +from pystencils import Target +from pystencils import fields + +from lbmpy.advanced_streaming.utility import get_timesteps +from lbmpy.boundaries import FreeSlip, UBB, ExtrapolationOutflow +from lbmpy.creationfunctions import create_lb_method, create_lb_collision_rule +from lbmpy import LBMConfig, LBMOptimisation, Stencil, Method, LBStencil +from pystencils_walberla import CodeGeneration, generate_info_header +from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator + +with CodeGeneration() as ctx: + target = Target.CPU # Target.GPU if ctx.cuda else Target.CPU + data_type = "float64" if ctx.double_accuracy else "float32" + + streaming_pattern = 'pull' + timesteps = get_timesteps(streaming_pattern) + + omega = sp.symbols("omega") + + stencil = LBStencil(Stencil.D3Q27) + pdfs, vel_field = fields(f"pdfs({stencil.Q}), velocity({stencil.D}): {data_type}[{stencil.D}D]", + layout='fzyx') + + macroscopic_fields = {'velocity': vel_field} + + lbm_config = LBMConfig(stencil=stencil, method=Method.SRT, relaxation_rate=omega, + streaming_pattern=streaming_pattern) + lbm_opt = LBMOptimisation(cse_global=False, field_layout='fzyx') + + method = create_lb_method(lbm_config=lbm_config) + collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) + + freeslip = lbm_boundary_generator("FreeSlip", flag_uid="FreeSlip", boundary_object=FreeSlip(stencil)) + ubb = lbm_boundary_generator(class_name='UBB', flag_uid='UBB', + boundary_object=UBB([sp.Symbol("u_x"), 0.0, 0.0], data_type=data_type)) + outflow = lbm_boundary_generator(class_name='Outflow', flag_uid='Outflow', + boundary_object=ExtrapolationOutflow(stencil[4], method), + field_data_type=data_type) + + generate_lbm_package(ctx, name="FreeSlipRefinement", + collision_rule=collision_rule, + lbm_config=lbm_config, lbm_optimisation=lbm_opt, + nonuniform=True, boundaries=[freeslip, ubb, outflow], + macroscopic_fields=macroscopic_fields, + data_type=data_type, pdfs_data_type=data_type) + + generate_info_header(ctx, 'FreeSlipRefinementInfoHeader') diff --git a/tests/lbm_generated/InterpolationNoSlip.cpp b/tests/lbm_generated/InterpolationNoSlip.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cfbe99fcc87ba52fab85461a65326148af85a5da --- /dev/null +++ b/tests/lbm_generated/InterpolationNoSlip.cpp @@ -0,0 +1,191 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file InterpolatedNoSlip.cpp +//! \author Markus Holzer <markus.holzer@fau.de> +//! \brief Couette flow driven by a UBB BC in Nord and wall boundary in the South. Remaining directions are periodic +//! If Interpolation BC are used the distance of the southern wall can be controlled. The velocity in the +//! first fluid cell is checked and compared with the velocity obtained from a default NoSlip BC. +//! Depending on the set distance for the interpolation BCs the velocity is expected to be higher or lower +// +//====================================================================================================================== +#include "blockforest/Initialization.h" +#include "blockforest/communication/UniformBufferedScheme.h" + +#include "core/DataTypes.h" +#include "core/Environment.h" +#include "core/debug/TestSubsystem.h" +#include "core/logging/Initialization.h" +#include "core/math/Vector3.h" +#include "core/timing/RemainingTimeLogger.h" + +#include "field/AddToStorage.h" +#include "field/FlagField.h" +#include "field/GhostLayerField.h" +#include "field/vtk/VTKWriter.h" + +#include "geometry/InitBoundaryHandling.h" + +#include "timeloop/SweepTimeloop.h" + +#include "lbm_generated/communication/UniformGeneratedPdfPackInfo.h" +#include "lbm_generated/field/AddToStorage.h" +#include "lbm_generated/field/PdfField.h" + +// include the generated header file. It includes all generated classes +#include "InterpolationNoSlipHeader.h" + +using namespace walberla; +using namespace std::placeholders; + +using StorageSpecification_T = lbm::InterpolationNoSlipStorageSpecification; +using Stencil_T = StorageSpecification_T::Stencil; +using CommunicationStencil_T = StorageSpecification_T::CommunicationStencil; +using PdfField_T = lbm_generated::PdfField< StorageSpecification_T >; +using PackInfo_T = lbm_generated::UniformGeneratedPdfPackInfo< PdfField_T >; + +using SweepCollection_T = lbm::InterpolationNoSlipSweepCollection; + +using VectorField_T = GhostLayerField< real_t, StorageSpecification_T::Stencil::D >; +using ScalarField_T = GhostLayerField< real_t, 1 >; + +using flag_t = walberla::uint8_t; +using FlagField_T = FlagField< flag_t >; +using BoundaryCollection_T = lbm::InterpolationNoSlipBoundaryCollection< FlagField_T >; + +using blockforest::communication::UniformBufferedScheme; + +class wallDistance +{ + public: + wallDistance(const real_t q) : q_(q) {} + + real_t operator()(const Cell& fluidCell, const Cell& boundaryCell, const shared_ptr< StructuredBlockForest >& SbF, + IBlock& block) const; + + private: + const real_t q_; +}; // class wallDistance + +real_t wallDistance::operator()(const Cell& /*fluidCell*/, const Cell& /*boundaryCell*/, + const shared_ptr< StructuredBlockForest >& /*SbF*/, IBlock& /*block*/) const +{ + return q_; +} + +int main(int argc, char** argv) +{ + debug::enterTestMode(); + walberla::Environment walberlaEnv(argc, argv); + logging::configureLogging(walberlaEnv.config()); + + auto blocks = blockforest::createUniformBlockGridFromConfig(walberlaEnv.config()); + + auto domainSetup = walberlaEnv.config()->getOneBlock("DomainSetup"); + Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock"); + + // read parameters + auto parameters = walberlaEnv.config()->getOneBlock("Parameters"); + const real_t omega = parameters.getParameter< real_t >("omega", real_c(1.4)); + const real_t distanceWall = parameters.getParameter< real_t >("distanceWall", real_c(0.5)); + const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(10)) + uint_c(1); + + WALBERLA_LOG_DEVEL_VAR(distanceWall) + + auto remainingTimeLoggerFrequency = + parameters.getParameter< real_t >("remainingTimeLoggerFrequency", real_c(3.0)); // in seconds + + const StorageSpecification_T StorageSpec = StorageSpecification_T(); + BlockDataID pdfFieldId = lbm_generated::addPdfFieldToStorage(blocks, "pdf field", StorageSpec, uint_c(1)); + BlockDataID velFieldId = field::addToStorage< VectorField_T >(blocks, "Velocity", real_c(0.0), field::fzyx); + + BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field", uint_c(1)); + + SweepCollection_T sweepCollection(blocks, pdfFieldId, velFieldId, omega); + for (auto& block : *blocks) + { + sweepCollection.initialise(&block); + } + + const FlagUID fluidFlagUID("Fluid"); + auto boundariesConfig = walberlaEnv.config()->getBlock("Boundaries"); + geometry::initBoundaryHandling< FlagField_T >(*blocks, flagFieldId, boundariesConfig); + geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldId, fluidFlagUID); + + const wallDistance wallDistanceCallback{ distanceWall }; + std::function< real_t(const Cell&, const Cell&, const shared_ptr< StructuredBlockForest >&, IBlock&) > + wallDistanceFunctor = wallDistanceCallback; + // For the BoundaryCollection a funcotr to calculate the wall distance for the Bouzidi NoSlip and for the QuadraticBB + // have to be provided. In this test case we use the same function to calculate the wall distance + BoundaryCollection_T boundaryCollection(blocks, flagFieldId, pdfFieldId, fluidFlagUID, omega, wallDistanceFunctor, + wallDistanceFunctor); + + auto packInfo = std::make_shared< lbm_generated::UniformGeneratedPdfPackInfo< PdfField_T > >(pdfFieldId); + UniformBufferedScheme< Stencil_T > communication(blocks); + communication.addPackInfo(packInfo); + + SweepTimeloop timeloop(blocks->getBlockStorage(), timesteps); + timeloop.add() << BeforeFunction(communication, "communication") + << Sweep(boundaryCollection.getSweep(BoundaryCollection_T::ALL), "Boundary Conditions"); + timeloop.add() << Sweep(sweepCollection.streamCollide(SweepCollection_T::ALL), "LBM StreamCollide"); + + uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0); + if (vtkWriteFrequency > 0) + { + auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "InterpolationNoSlipVTK", vtkWriteFrequency, 0, false, + "vtk_out", "simulation_step", false, true, true, false, 0); + + auto velWriter = make_shared< field::VTKWriter< VectorField_T > >(velFieldId, "velocity"); + vtkOutput->addBeforeFunction([&]() { + for (auto& block : *blocks) + { + sweepCollection.calculateMacroscopicParameters(&block); + } + }); + + vtkOutput->addCellDataWriter(velWriter); + timeloop.addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output"); + } + + if (remainingTimeLoggerFrequency > 0) + { + // log remaining time + timeloop.addFuncAfterTimeStep( + timing::RemainingTimeLogger(timeloop.getNrOfTimeSteps(), remainingTimeLoggerFrequency), + "remaining time logger"); + } + + timeloop.run(); + + // This is the velocity at the wall, when a NoSlip BC is used. This is similar to using an interpolation BC with a + // wall distance of 0.5. This value can be obtained by either setting distanceWall to 0.5 in the Parameter file or + // specifying the NoSlip BC at the southern boundary + const real_t defaultNoSlipVelocity = real_c(0.0002); + + for (auto& block : *blocks) + { + sweepCollection.calculateMacroscopicParameters(&block); + + auto velField = block.getData< VectorField_T >(velFieldId); + auto velAtWall = velField->get(cell_idx_c(cellsPerBlock[0] / 2), 0, cell_idx_c(cellsPerBlock[2] / 2), 0); + // WALBERLA_LOG_DEVEL_VAR(velAtWall) + + if (distanceWall > 0.49 && distanceWall < 0.51) { WALBERLA_CHECK_FLOAT_EQUAL(velAtWall, defaultNoSlipVelocity) } + else if (distanceWall < 0.49) { WALBERLA_CHECK_GREATER(defaultNoSlipVelocity, velAtWall) } + else if (distanceWall > 0.51) { WALBERLA_CHECK_LESS(defaultNoSlipVelocity, velAtWall) } + } + + return EXIT_SUCCESS; +} diff --git a/tests/lbm_generated/InterpolationNoSlip.prm b/tests/lbm_generated/InterpolationNoSlip.prm new file mode 100644 index 0000000000000000000000000000000000000000..4b15df27eb89235af707a5209abb37c31da50c61 --- /dev/null +++ b/tests/lbm_generated/InterpolationNoSlip.prm @@ -0,0 +1,30 @@ +Parameters +{ + omega 1.1; + timesteps 5000; + distanceWall 0.9; + + remainingTimeLoggerFrequency 0; // in seconds + vtkWriteFrequency 0; +} + +DomainSetup +{ + blocks < 1, 1, 1 >; + cellsPerBlock < 50, 25, 25 >; + periodic < 1, 0, 1 >; +} + +Boundaries +{ + // Border { direction S; walldistance -1; flag NoSlip; } + // Border { direction S; walldistance -1; flag NoSlipBouzidi; } + Border { direction S; walldistance -1; flag NoSlipQuadraticBB; } + Border { direction N; walldistance -1; flag UBB; } +} + + +Logging +{ + logLevel info; // info progress detail tracing +} diff --git a/tests/lbm_generated/InterpolationNoSlip.py b/tests/lbm_generated/InterpolationNoSlip.py new file mode 100644 index 0000000000000000000000000000000000000000..62463033edaab74efff765a7e412a273b44bccbf --- /dev/null +++ b/tests/lbm_generated/InterpolationNoSlip.py @@ -0,0 +1,53 @@ +import sympy as sp + +from pystencils import Target +from pystencils import fields + +from lbmpy.advanced_streaming.utility import get_timesteps +from lbmpy.boundaries import NoSlip, NoSlipLinearBouzidi, QuadraticBounceBack, UBB +from lbmpy.creationfunctions import create_lb_method, create_lb_collision_rule +from lbmpy import LBMConfig, LBMOptimisation, Stencil, Method, LBStencil +from pystencils_walberla import CodeGeneration, generate_info_header +from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator + +import warnings + +warnings.filterwarnings("ignore") +with CodeGeneration() as ctx: + target = Target.CPU # Target.GPU if ctx.cuda else Target.CPU + data_type = "float64" if ctx.double_accuracy else "float32" + + streaming_pattern = 'pull' + timesteps = get_timesteps(streaming_pattern) + + omega = sp.symbols("omega") + + stencil = LBStencil(Stencil.D3Q27) + pdfs, vel_field = fields(f"pdfs({stencil.Q}), velocity({stencil.D}): {data_type}[{stencil.D}D]", + layout='fzyx') + + macroscopic_fields = {'velocity': vel_field} + + lbm_config = LBMConfig(stencil=stencil, method=Method.SRT, relaxation_rate=omega, + streaming_pattern=streaming_pattern) + lbm_opt = LBMOptimisation(cse_global=False, field_layout='fzyx') + + method = create_lb_method(lbm_config=lbm_config) + collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) + + no_slip = lbm_boundary_generator(class_name='NoSlip', flag_uid='NoSlip', + boundary_object=NoSlip()) + no_slip_bouzidi = lbm_boundary_generator(class_name='NoSlipBouzidi', flag_uid='NoSlipBouzidi', + boundary_object=NoSlipLinearBouzidi(data_type=data_type)) + no_slip_quadraticbb = lbm_boundary_generator(class_name='NoSlipQuadraticBB', flag_uid='NoSlipQuadraticBB', + boundary_object=QuadraticBounceBack(omega, data_type=data_type)) + ubb = lbm_boundary_generator(class_name='UBB', flag_uid='UBB', + boundary_object=UBB([0.01, 0, 0], data_type=data_type)) + + generate_lbm_package(ctx, name="InterpolationNoSlip", + collision_rule=collision_rule, + lbm_config=lbm_config, lbm_optimisation=lbm_opt, + nonuniform=True, boundaries=[no_slip, no_slip_bouzidi, no_slip_quadraticbb, ubb], + macroscopic_fields=macroscopic_fields, data_type=data_type) + + generate_info_header(ctx, 'InterpolationNoSlipHeader')