diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 27b8999837b423911c0e036580b9eeeca3e72fee..c4dc0ffe6292e737ea319162a0efd7106a1a32fb 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -51,6 +51,7 @@ stages:
         -DWALBERLA_BUILD_WITH_METIS=$WALBERLA_BUILD_WITH_METIS
         -DWALBERLA_BUILD_WITH_PARMETIS=$WALBERLA_BUILD_WITH_PARMETIS
         -DWALBERLA_BUILD_WITH_FFTW=$WALBERLA_BUILD_WITH_FFTW
+        -DWALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT=$WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
         -DWALBERLA_ENABLE_GUI=$WALBERLA_ENABLE_GUI
         -DWALBERLA_BUILD_WITH_CODEGEN=$WALBERLA_BUILD_WITH_CODEGEN
         -DWALBERLA_STL_BOUNDS_CHECKS=$WALBERLA_STL_BOUNDS_CHECKS
@@ -769,6 +770,7 @@ gcc_13_serial:
       WALBERLA_BUILD_WITH_PARMETIS: "OFF"
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
       WALBERLA_BUILD_WITH_PYTHON: "ON"
+      WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT: "ON"
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
@@ -791,6 +793,7 @@ gcc_13_mpionly:
       WALBERLA_BUILD_WITH_OPENMP: "OFF"
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
       WALBERLA_BUILD_WITH_PYTHON: "ON"
+      WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT: "ON"
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
@@ -813,6 +816,7 @@ gcc_13_hybrid:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
       WALBERLA_BUILD_WITH_PYTHON: "ON"
+      WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT: "ON"
    tags:
       - cuda11
       - docker
@@ -835,6 +839,7 @@ gcc_13_serial_dbg:
       CMAKE_BUILD_TYPE: "DebugOptimized"
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
       WALBERLA_BUILD_WITH_PYTHON: "ON"
+      WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT: "ON"
    tags:
       - cuda11
       - docker
@@ -855,6 +860,7 @@ gcc_13_mpionly_dbg:
       WALBERLA_BUILD_WITH_OPENMP: "OFF"
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
       WALBERLA_BUILD_WITH_PYTHON: "ON"
+      WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT: "ON"
    tags:
       - cuda11
       - docker
@@ -874,6 +880,7 @@ gcc_13_hybrid_dbg:
       CMAKE_BUILD_TYPE: "DebugOptimized"
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
       WALBERLA_BUILD_WITH_PYTHON: "ON"
+      WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT: "ON"
    tags:
       - cuda11
       - docker
@@ -896,6 +903,7 @@ gcc_13_hybrid_dbg_sp:
       WALBERLA_BUILD_WITH_METIS: "OFF"
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
       WALBERLA_BUILD_WITH_PYTHON: "ON"
+      WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT: "ON"
    tags:
       - cuda11
       - docker
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5eae2503b9bd414976b0cbfce5478d2af4ea4a54..1d21694af654ba0b996a8bbce0c3096ef6a125ab 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1276,12 +1276,6 @@ if ( WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT )
       message( FATAL_ERROR "Compiler: ${CMAKE_CXX_COMPILER} Version: ${CMAKE_CXX_COMPILER_VERSION} does not support half precision" )
    endif ()
 
-   # Local host optimization
-   if ( NOT WALBERLA_OPTIMIZE_FOR_LOCALHOST )
-      message( WARNING "[WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT] "
-            "You are not optimizing for localhost. You may encounter linker errors, or WORSE: silent incorrect fp16 arithmetic! Consider also enabling WALBERLA_OPTIMIZE_FOR_LOCALHOST!" )
-   endif () # Local host check
-
 endif () # Check if WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT is set
 
 ############################################################################################################################
diff --git a/src/core/DataTypes.h b/src/core/DataTypes.h
index 4e7c019a86dcf0ce23fb7c8a9e66e6add8500bde..d6147c12be61a072f112e49de6e68001a6013abc 100644
--- a/src/core/DataTypes.h
+++ b/src/core/DataTypes.h
@@ -167,6 +167,7 @@ using real_t = double;
 using real_t = float;
 #endif
 
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
 /// Half precision support. Experimental. Use carefully.
 ///
 /// This feature is experimental, since it strictly depends on the underlying architecture and compiler support.
@@ -174,7 +175,6 @@ using real_t = float;
 /// interchange. Arithmetic operations will likely involve casting to fp32 (C++ float) and truncation to fp16.
 /// Only bandwidth bound code may therefore benefit. None of this is guaranteed, and may change in the future.
 ///
-#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
 /// FIXME: (not really right) Clang version must be 15 or higher for x86 half precision support.
 /// FIXME: (not really right) GCC version must be 12 or higher for x86 half precision support.
 /// FIXME: (I don't know) Also support seems to require SSE, so ensure that respective instruction sets are enabled.
@@ -202,7 +202,7 @@ using half    = _Float16;
 // Another possible half precision format would be the one from Google Brain (bfloat16) with an 8 bit exponent and a 7 bit mantissa.
 // Compare https://i10git.cs.fau.de/ab04unyc/walberla/-/issues/23
 using float16 = half;
-#endif
+#endif // WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
 using float32 = float;
 using float64 = double;
 
@@ -276,7 +276,7 @@ inline bool floatIsEqual( walberla::float16 lhs, walberla::float16 rhs, const wa
    const auto difference = lhs - rhs;
    return ( (difference < 0) ? -difference : difference ) < epsilon;
 }
-#endif
+#endif // WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
 
 } // namespace walberla
 
diff --git a/src/core/mpi/CMakeLists.txt b/src/core/mpi/CMakeLists.txt
index 13cc3653bd41f03e1840abd09f71147eee3c57cd..2987e28c266bce6552659cce89f5ef9546927a76 100644
--- a/src/core/mpi/CMakeLists.txt
+++ b/src/core/mpi/CMakeLists.txt
@@ -19,11 +19,14 @@ target_sources( core
       MPIIO.h
       MPIManager.cpp
       MPIManager.h
+      MPIOperation.h
       MPITextFile.cpp
       MPITextFile.h
+      MPIWrapper.cpp
       MPIWrapper.h
       OpenMPBufferSystem.h
       OpenMPBufferSystem.impl.h
+      Operation.h
       RecvBuffer.h
       Reduce.h
       SendBuffer.h
diff --git a/src/core/mpi/Datatype.h b/src/core/mpi/Datatype.h
index f717cb6d94c661aec320a864c972dbba4a49d2ae..ac933bbe8c1e572520d0e67f75f8199c1adc8910 100644
--- a/src/core/mpi/Datatype.h
+++ b/src/core/mpi/Datatype.h
@@ -22,6 +22,7 @@
 #pragma once
 
 #include "MPIWrapper.h"
+#include "core/Abort.h"
 
 
 namespace walberla {
@@ -43,6 +44,21 @@ namespace mpi {
          WALBERLA_MPI_SECTION() { MPI_Type_commit( &mpiDatatype_ ); }
       }
 
+      explicit Datatype(const uint_t byteSize) : mpiDatatype_(MPI_DATATYPE_NULL)
+      {
+         WALBERLA_MPI_SECTION()
+         {
+            if (MPI_Type_contiguous(int_c(byteSize), MPI_BYTE, &mpiDatatype_) != MPI_SUCCESS)
+            {
+               WALBERLA_ABORT("MPI_Type_contiguous " << typeid(mpiDatatype_).name() << " failed.");
+            }
+            if (MPI_Type_commit(&mpiDatatype_) != MPI_SUCCESS)
+            {
+               WALBERLA_ABORT("MPI_Type_commit " << typeid(mpiDatatype_).name() << " failed.");
+            }
+         }
+      }
+
       void init( MPI_Datatype datatype )
       {
          mpiDatatype_ = datatype;
diff --git a/src/core/mpi/MPIManager.cpp b/src/core/mpi/MPIManager.cpp
index a334bc16c4878cea58a1452cd083fba31d9d3c7e..c25ca1082277d89c8be486ae7a9c61350d6bfea0 100644
--- a/src/core/mpi/MPIManager.cpp
+++ b/src/core/mpi/MPIManager.cpp
@@ -119,6 +119,10 @@ void MPIManager::finalizeMPI()
 {
    WALBERLA_MPI_SECTION()
    {
+      /// Free the custom types and operators
+      customMPITypes_.clear();
+      customMPIOperations_.clear();
+
       if (isMPIInitialized_ && !currentlyAborting_)
       {
          isMPIInitialized_ = false;
diff --git a/src/core/mpi/MPIManager.h b/src/core/mpi/MPIManager.h
index 9ba3fb4d04b8f6b0c7f1e2041454677b9156bf75..60ce4d8514e57bb2465a14dfa304cab73b3b8be6 100644
--- a/src/core/mpi/MPIManager.h
+++ b/src/core/mpi/MPIManager.h
@@ -18,23 +18,28 @@
 //! \author Florian Schornbaum <florian.schornbaum@fau.de>
 //! \author Martin Bauer <martin.bauer@fau.de>
 //! \author Christian Godenschwager <christian.godenschwager@fau.de>
+//! \author Michael Zikeli <michael.zikeli@fau.de>
 //
 //======================================================================================================================
 
 #pragma once
 
-#include "MPIWrapper.h"
 #include "core/DataTypes.h"
 #include "core/debug/Debug.h"
 #include "core/math/Uint.h"
+#include "core/mpi/Datatype.h"
+#include "core/mpi/MPIOperation.h"
+#include "core/mpi/MPIWrapper.h"
+#include "core/mpi/Operation.h"
 #include "core/singleton/Singleton.h"
 
+#include <map>
+#include <typeindex>
 
 namespace walberla {
 namespace mpi {
 
 
-
 /**
  * Encapsulates MPI Rank/Communicator information
  *
@@ -127,6 +132,87 @@ public:
    /// Indicates whether MPI-IO can be used with the current MPI communicator; certain versions of OpenMPI produce
    /// segmentation faults when using MPI-IO with a 3D Cartesian MPI communicator (see waLBerla issue #73)
    bool isCommMPIIOValid() const;
+
+   /// Return the custom MPI_Datatype stored in 'customMPITypes_' and defined by the user and passed to 'commitCustomType'.
+   template< typename CType >
+   MPI_Datatype getCustomType() const
+   {
+      WALBERLA_MPI_SECTION()
+      {
+         return customMPITypes_.at(typeid(CType)).operator MPI_Datatype();
+      }
+      WALBERLA_NON_MPI_SECTION()
+      {
+         WALBERLA_ABORT( "This should not be called, if waLBerla is compiled without MPI." );
+      }
+   }
+
+   /// Return the custom MPI_Op stored in 'customMPIOperation_' and defined by the user and passed to 'commitCustomOperation'.
+   template< typename CType >
+   MPI_Op getCustomOperation(mpi::Operation op) const
+   {
+      // FIXME the operation is actually type dependent but implementing this is not straightforward,
+      //    compare comment at declaration of 'customMPIOperations_'.
+      WALBERLA_MPI_SECTION()
+      {
+         return customMPIOperations_.at(op).operator MPI_Op();
+      }
+      WALBERLA_NON_MPI_SECTION()
+      {
+         WALBERLA_ABORT( "This should not be called, if waLBerla is compiled without MPI." );
+      }
+   }
+   //@}
+   //*******************************************************************************************************************
+
+   //** Setter Functions  **********************************************************************************************
+   /*! \name Setter Function */
+   //@{
+   ///! \brief Initializes a custom MPI_Datatype and logs it in the customMPITypes_ map.
+   ///! \param argument The argument that is expected by the constructor of mpi::Datatype
+   ///     At the point of creation 26.01.2024 this is either MPI_Datatype or const int.
+   template < typename CType, class ConstructorArgumentType >
+   void commitCustomType( ConstructorArgumentType& argument )
+   {
+      WALBERLA_MPI_SECTION()
+      {
+         if (isMPIInitialized_ && !currentlyAborting_)
+         {
+            static_assert( std::is_same_v<ConstructorArgumentType, const int> || std::is_same_v<ConstructorArgumentType, MPI_Datatype>,
+                           "mpi::Datatype has only an constructor for an int value or a MPI_Datatype." );
+            [[maybe_unused]] auto worked = std::get< 1 >( customMPITypes_.try_emplace(typeid(CType), argument) );
+            WALBERLA_ASSERT(worked, "Emplacement of type " << typeid(CType).name() << " did not work.");
+         } else {
+            WALBERLA_ABORT( "MPI must be initialized before an new MPI_Datatype can be committed." );
+         }
+      }
+      WALBERLA_NON_MPI_SECTION()
+      {
+         WALBERLA_ABORT( "This should not be called, if waLBerla is compiled without MPI." );
+      }
+   }
+
+   ///! \brief Initializes a custom MPI_Op and logs it in the customMPIOperation map
+   ///! \param op  A operator, e.g. SUM, MIN.
+   ///! \param fct The definition of the MPI_User_function used for this operator.
+   template< typename CType >
+   void commitCustomOperation( mpi::Operation op, MPI_User_function* fct )
+   {
+      WALBERLA_MPI_SECTION()
+      {
+         if (isMPIInitialized_ && !currentlyAborting_)
+         {
+            [[maybe_unused]] auto worked = std::get< 1 >(customMPIOperations_.try_emplace(op, fct));
+            WALBERLA_ASSERT(worked, "Emplacement of operation " << typeid(op).name() << " of type "
+                                                                << typeid(CType).name() << " did not work.");
+         }
+         else { WALBERLA_ABORT("MPI must be initialized before an new MPI_Op can be committed."); }
+      }
+      WALBERLA_NON_MPI_SECTION()
+      {
+         WALBERLA_ABORT( "This should not be called, if waLBerla is compiled without MPI." );
+      }
+   }
    //@}
    //*******************************************************************************************************************
 
@@ -163,6 +249,33 @@ private:
    // Singleton
    MPIManager() : comm_(MPI_COMM_NULL) { WALBERLA_NON_MPI_SECTION() { rank_ = 0; } }
 
+/// It is possible to commit own datatypes to MPI, that are not part of the standard. One example would be float16.
+/// With these maps, it is possible to track self defined MPI_Datatypes and MPI_Ops, to access them at any time and
+/// place in the program, also, they are automatically freed once MPIManager::finalizeMPI is called.
+/// To initialize types or operations and add them to the map, the getter functions 'commitCustomType' and
+/// 'commitCustomOperation' should be used. This can for example be done e.g. in the specialization of the MPITrait of
+/// the newly defined type. For an example see MPIWrapper.cpp
+   std::map< std::type_index, walberla::mpi::Datatype > customMPITypes_{};
+   std::map< walberla::mpi::Operation, walberla::mpi::MPIOperation > customMPIOperations_{};
+   // FIXME this must be type specific as well, but doing so is a bit more complicated.
+   //  1. Idea) Combining both maps together e.g. as std::map< typeid(CType),
+   //                                                          std::pair< MPI_DataType,
+   //                                                                     std::map< Operation,
+   //                                                                               MPI_Op > > > customMPITypesWithOps_{};
+   //           There the access is quite nasty to look at, but easily doable, the construction however is quite difficult
+   //           also one needs to make sure that the type is initialized before the operation.
+   //  2. Idea) Leaving it as two maps customMPITypes_ and customMPIOperations,
+   //           but storing a pair of typeid and operation as key for the operation map.
+   //           This way everything would look nice, but someone needs to implement a comparison operator for this pair.
+   //           I personally don't know where to put this comparison operator to, since it should not be part of the manager.
+   //  3. Idea) Since this relies on the use of MPITrait<CType> --> { MPI_Datatype, MPI_Op } someone could define a object
+   //           to store in the MPIManager there, to keep the MPIManager light and easily understandable.
+   //           I'm also not sure if the MPITrait is the right spot for this though.
+   //  For more information about the changes done in the code to allow custom defined types and operations,
+   //  check out MR !647 ( https://i10git.cs.fau.de/walberla/walberla/-/merge_requests/647 )
+
+
+
 }; // class MPIManager
 
 
diff --git a/src/core/mpi/MPIOperation.h b/src/core/mpi/MPIOperation.h
new file mode 100644
index 0000000000000000000000000000000000000000..3da98bfe8ea2bdb8783b02e997cdee486bb46a0d
--- /dev/null
+++ b/src/core/mpi/MPIOperation.h
@@ -0,0 +1,64 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file MPIOperation.h
+//! \ingroup core
+//! \author Michael Zikeli <michael.zikeli@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "core/Abort.h"
+#include "core/mpi/MPIWrapper.h"
+
+namespace walberla::mpi{
+
+//*******************************************************************************************************************
+/*! RAII class for MPI operators that commits and frees them
+*
+*/
+//*******************************************************************************************************************
+class MPIOperation
+{
+ public:
+   MPIOperation() = delete;
+
+   explicit MPIOperation( MPI_User_function* fct ) : mpiOperation_( MPI_OP_NULL )
+   {
+      WALBERLA_MPI_SECTION() {
+         if ( MPI_Op_create( fct, true, &mpiOperation_) != MPI_SUCCESS )
+         {
+            WALBERLA_ABORT("MPI_Op_create for " << typeid(mpiOperation_).name() << " failed." );
+         }
+      } // WALBERLA_MPI_SECTIION
+   }
+
+   ~MPIOperation()
+   {
+      WALBERLA_MPI_SECTION() { MPI_Op_free( & mpiOperation_ ); }
+   }
+
+   operator MPI_Op() const
+   {
+      return mpiOperation_;
+   }
+
+ protected:
+   MPI_Op mpiOperation_;
+};
+
+
+} // namespace walberla::mpi
\ No newline at end of file
diff --git a/src/core/mpi/MPIWrapper.cpp b/src/core/mpi/MPIWrapper.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9d68668676c932de2c564dd37312ca3645104e45
--- /dev/null
+++ b/src/core/mpi/MPIWrapper.cpp
@@ -0,0 +1,142 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file MPIWrapper.cpp
+//! \ingroup core
+//! \author Michael Zikeli <michael.zikeli@fau.de>
+//
+//======================================================================================================================
+
+#include "MPIWrapper.h"
+
+#include <set>
+
+#include "MPIManager.h"
+
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+namespace walberla
+{
+
+namespace mpi
+{
+namespace
+{
+/// These functions than can be used by self defined mpi operations, e.g. by using CustomMPIOperation.
+using float16_t = walberla::float16;
+// The signature of MPI_User_function looks like this
+// typedef void MPI_User_function( void *invec, void *inoutvec, int *len, MPI_Datatype *datatype);
+
+void sum(void* mpiRHSArray, void* mpiLHSArray, int* len, MPI_Datatype*)
+{
+   // cast mpi type to target c++ type
+   auto* rhs = (float16_t*) mpiRHSArray;
+   auto* lhs = (float16_t*) mpiLHSArray;
+   for (int i = 0; i < *len; ++i)
+   {
+      *lhs += *rhs;
+   }
+}
+
+void min(void* mpiRHSArray, void* mpiLHSArray, int* len, MPI_Datatype*)
+{
+   // cast mpi type to target c++ type
+   auto* rhs = (float16_t*) mpiRHSArray;
+   auto* lhs = (float16_t*) mpiLHSArray;
+   for (int i = 0; i < *len; ++i)
+   {
+      *lhs = (*rhs >= *lhs) ? *lhs : *rhs;
+   }
+}
+
+void max(void* mpiRHSArray, void* mpiLHSArray, int* len, MPI_Datatype*)
+{
+   // cast mpi type to target c++ type
+   auto* rhs = (float16_t*) mpiRHSArray;
+   auto* lhs = (float16_t*) mpiLHSArray;
+   for (int i = 0; i < *len; ++i)
+   {
+      *lhs = (*rhs <= *lhs) ? *lhs : *rhs;
+   }
+}
+
+MPI_User_function* returnMPIUserFctPointer(const Operation op)
+{
+   switch (op)
+   {
+   case SUM:
+      return &sum;
+   case MIN:
+      return &min;
+   case MAX:
+      return &max;
+   default:
+      WALBERLA_ABORT("The chosen operation " << typeid(op).name() << " is not implemented for float16 yet.");
+   }
+}
+
+}
+}
+
+/// Here some MPI_Datatypes and MPI_Ops are initialized that are not part of the MPI Standard and therefore have to be
+/// define yourself. This is done in the MPIManager, since they need to be freed before MPIFinalize is called and this
+/// way it is easiest to keep track of them.
+///     For more information about this feature compare MR !647 (
+///     https://i10git.cs.fau.de/walberla/walberla/-/merge_requests/647 )
+
+/*!
+ *  \brief Specialization of MPITrait for float16
+ *
+ *  The initialization of the self defined MPI_Datatype and MPI_Op is done in the MPIManager so that it can be freed
+ * before MPI is finalized.
+ */
+MPI_Datatype MPITrait< walberla::float16 >::type()
+{
+
+#ifdef WALBERLA_BUILD_WITH_MPI
+   // Since this type should be created only once, a static variable is used as safeguard.
+   static bool initializedType = false;
+   if (!initializedType)
+   {
+      // Since float16 consists of two Bytes, a continuous datatype with size of two byte is created.
+      mpi::MPIManager::instance()->commitCustomType< walberla::float16, const int >(2);
+      initializedType = true;
+   }
+   return mpi::MPIManager::instance()->getCustomType< walberla::float16 >();
+#else
+   return mpistubs::MPI_FLOAT16;
+#endif
+}
+
+MPI_Op MPITrait< walberla::float16 >::operation(const mpi::Operation& op)
+{
+   WALBERLA_MPI_SECTION()
+   {
+      // mpi::Operation is an enum and not an enum class, thus, it is not sufficient to make a just a bool variable as
+      // safeguard, since all operations are of type mpi::Operation and only the first one would pass the safeguard.
+      // Therefore, a set is created and each operation that is called the first time, will be initialized.
+      static std::set< mpi::Operation > operationInitializationRegister;
+      const bool needsInitialization = std::get< 1 >(operationInitializationRegister.emplace(op));
+      if (needsInitialization)
+      {
+         mpi::MPIManager::instance()->commitCustomOperation< walberla::float16 >(
+            op, mpi::returnMPIUserFctPointer(op));
+      }
+      return MPIManager::instance()->getCustomOperation< walberla::float16 >(op);
+   }
+   WALBERLA_NON_MPI_SECTION() { WALBERLA_ABORT("If MPI is not used, a custom operator should never be called."); }
+}
+
+} // namespace walberla
+#endif // WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
diff --git a/src/core/mpi/MPIWrapper.h b/src/core/mpi/MPIWrapper.h
index eecee3136c702a61e7987d25705076a7c780a635..baf0c62d8ba65bd7bd91a665d5d5261e8323d778 100644
--- a/src/core/mpi/MPIWrapper.h
+++ b/src/core/mpi/MPIWrapper.h
@@ -23,8 +23,7 @@
 #pragma once
 
 #include "core/Abort.h"
-
-
+#include "core/mpi/Operation.h"
 
 /// \cond internal
 
@@ -47,6 +46,7 @@
 
 #endif
 
+
 namespace walberla {
 namespace mpistubs {
     //empty namespace which can be used
@@ -77,8 +77,6 @@ namespace mpistubs {
 #pragma warning ( pop )
 #endif
 
-
-
 #else // WALBERLA_BUILD_WITH_MPI
 
 
@@ -143,6 +141,10 @@ const MPI_Datatype MPI_UNSIGNED_LONG_LONG = 10;
 const MPI_Datatype MPI_FLOAT              = 11;
 const MPI_Datatype MPI_DOUBLE             = 12;
 const MPI_Datatype MPI_LONG_DOUBLE        = 13;
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+   const MPI_Datatype MPI_FLOAT16  = 14;
+#endif
+
 
 const int MPI_ORDER_C       = 0;
 const int MPI_ORDER_FORTRAN = 0;
@@ -151,16 +153,17 @@ const MPI_Datatype MPI_ANY_SOURCE = -2;
 const MPI_Datatype MPI_ANY_TAG    = -1;
 const MPI_Datatype MPI_DATATYPE_NULL = 0;
 
-const MPI_Op MPI_MIN  = 100;
-const MPI_Op MPI_MAX  = 101;
-const MPI_Op MPI_SUM  = 102;
-const MPI_Op MPI_PROD = 103;
-const MPI_Op MPI_LAND = 104;
-const MPI_Op MPI_BAND = 105;
-const MPI_Op MPI_LOR  = 106;
-const MPI_Op MPI_BOR  = 107;
-const MPI_Op MPI_LXOR = 108;
-const MPI_Op MPI_BXOR = 109;
+const MPI_Op MPI_OP_NULL = 99;
+const MPI_Op MPI_MIN     = 100;
+const MPI_Op MPI_MAX     = 101;
+const MPI_Op MPI_SUM     = 102;
+const MPI_Op MPI_PROD    = 103;
+const MPI_Op MPI_LAND    = 104;
+const MPI_Op MPI_BAND    = 105;
+const MPI_Op MPI_LOR     = 106;
+const MPI_Op MPI_BOR     = 107;
+const MPI_Op MPI_LXOR    = 108;
+const MPI_Op MPI_BXOR    = 109;
 
 const int MPI_PACKED = 1;
 const int MPI_UNDEFINED = -1;
@@ -265,6 +268,7 @@ inline int MPI_Type_get_extent(MPI_Datatype, MPI_Aint*, MPI_Aint*) { WALBERLA_MP
 inline int MPI_Type_create_struct(int, const int[], const MPI_Aint[], const MPI_Datatype[], MPI_Datatype*) { WALBERLA_MPI_FUNCTION_ERROR }
 
 inline int MPI_Op_create(MPI_User_function*, int, MPI_Op*) { WALBERLA_MPI_FUNCTION_ERROR }
+inline int MPI_Op_free(MPI_Op*) { WALBERLA_MPI_FUNCTION_ERROR }
 
 inline int MPI_Get_processor_name( char*, int* ) { WALBERLA_MPI_FUNCTION_ERROR }
 
@@ -307,58 +311,104 @@ namespace mpi {
 
 
 
+
+
 /*!\class MPITrait
 // \brief Base template for the MPITrait class
 //
 // The MPITrait class template offers a translation between the C++ built-in data types and
-// the corresponding MPI data types required for send and receive operations. For a particular
-// MPITrait instantiation, the corresponding MPI data type can be obtained by calling type()
-// of the MPITrait. The following example demonstrates the application of the MPITrait class:
+// the corresponding MPI data types its respective operation required for send, receive and reduce operations.
+// For a particular MPITrait instantiation, the corresponding MPI data type can be obtained by calling type()
+// as well as calling operation( const Operation& ) to the MPI operation corresponding to the MPI data type.
+// The following example demonstrates the application of the MPITrait class:
 
-   \code
+\code
    // Initialization of the MPI communication
    int* pi;  // Integer buffer for the MPI send operation
-   ...       // Initialization of the send buffer
+...       // Initialization of the send buffer
 
    // Sending 50 integers to process 0
    MPI_Send( pi, 50, MPITrait< int >::type(), 0, 0, MPI_COMM_WORLD );
-   \endcode
-*/
+\endcode
+      */
 template< typename T >
-struct MPITrait;
-
-
-
-/// Macro for the creation of MPITrait specializations for the supported data types.
+struct MPITrait
+{
+   static inline MPI_Datatype type();
+   static inline MPI_Op operation(const mpi::Operation&      );
+};
 
-#define WALBERLA_CREATE_MPITRAIT_SPECIALIZATION(CPP_TYPE,MPI_TYPE) \
+/// Macro for specialization of the MPI supported data types in MPITrait::type().
+#define WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(CPP_TYPE, MPI_TYPE) \
    template<> \
-   struct MPITrait< CPP_TYPE > \
+   inline MPI_Datatype MPITrait< CPP_TYPE >::type() \
    { \
-      static inline MPI_Datatype type() { return (MPI_TYPE); } \
+      return (MPI_TYPE); \
    }
 
-
-
 // MPITRAIT SPECIALIZATIONS
 
-WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( char               , MPI_CHAR               );
-WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( signed char        , MPI_CHAR               );
-WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( signed short int   , MPI_SHORT              );
-WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( signed int         , MPI_INT                );
-WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( signed long int    , MPI_LONG               );
-WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( signed long long   , MPI_LONG_LONG          );
-WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( unsigned char      , MPI_UNSIGNED_CHAR      );
-WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( unsigned short int , MPI_UNSIGNED_SHORT     );
-WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( unsigned int       , MPI_UNSIGNED           );
-WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( unsigned long int  , MPI_UNSIGNED_LONG      );
-WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( unsigned long long , MPI_UNSIGNED_LONG_LONG );
+WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(char, MPI_CHAR)
+WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(signed char, MPI_CHAR)
+WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(signed short int, MPI_SHORT)
+WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(signed int, MPI_INT)
+WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(signed long int, MPI_LONG)
+WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(signed long long, MPI_LONG_LONG)
+WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(unsigned char, MPI_UNSIGNED_CHAR)
+WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(unsigned short int, MPI_UNSIGNED_SHORT)
+WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(unsigned int, MPI_UNSIGNED)
+WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(unsigned long int, MPI_UNSIGNED_LONG)
+WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(unsigned long long, MPI_UNSIGNED_LONG_LONG)
+WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(float, MPI_FLOAT)
+WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(double, MPI_DOUBLE)
+WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(long double, MPI_LONG_DOUBLE)
 #ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
-   WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( walberla::float16  , MPI_WCHAR              );
+template<> struct MPITrait< float16 >{
+   static MPI_Datatype type();
+   static MPI_Op operation(const mpi::Operation&      );
+};
+#endif
+
+
+/*!
+ *  \brief Specialization of the static operation() method of MPITrait.
+ *
+ *  It chooses a MPI_Op depending on the value type of the object the operation is performed on.
+ *
+ *  \param op The operation to be performed (op is an element of the enum mpi::Operation).
+ */
+template< typename T >
+MPI_Op MPITrait< T >::operation(const mpi::Operation& op)
+{
+   switch (op)
+   {
+   case mpi::MIN:
+      return MPI_MIN;
+   case mpi::MAX:
+      return MPI_MAX;
+   case mpi::SUM:
+      return MPI_SUM;
+   case mpi::PRODUCT:
+      return MPI_PROD;
+   case mpi::LOGICAL_AND:
+      return MPI_LAND;
+   case mpi::BITWISE_AND:
+      return MPI_BAND;
+   case mpi::LOGICAL_OR:
+      return MPI_LOR;
+   case mpi::BITWISE_OR:
+      return MPI_BOR;
+   case mpi::LOGICAL_XOR:
+      return MPI_LXOR;
+   case mpi::BITWISE_XOR:
+      return MPI_BXOR;
+   default:
+      WALBERLA_ABORT("Unknown operation!");
+   }
+#ifdef __IBMCPP__
+   return MPI_SUM; // never reached, helps to suppress a warning from the IBM compiler
 #endif
-WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( float              , MPI_FLOAT              );
-WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( double             , MPI_DOUBLE             );
-WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( long double        , MPI_LONG_DOUBLE        );
+}
 
 } // namespace walberla
 /// \endcond
diff --git a/src/core/mpi/Operation.h b/src/core/mpi/Operation.h
new file mode 100644
index 0000000000000000000000000000000000000000..f3097bb89fe4f3fcb3f4581d8435afe2b2b70ec8
--- /dev/null
+++ b/src/core/mpi/Operation.h
@@ -0,0 +1,27 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file Operation.h
+//! \ingroup core
+//! \author Michael Zikeli <michael.zikeli@fau.de>
+//
+//======================================================================================================================
+#pragma once
+
+namespace walberla::mpi
+{
+// Note: I don't like at all that this is an enum and not an enum class, but changing this would be a major change in the framework.
+enum Operation { MIN, MAX, SUM, PRODUCT, LOGICAL_AND, BITWISE_AND, LOGICAL_OR, BITWISE_OR, LOGICAL_XOR, BITWISE_XOR };
+} // namespace walberla::mpi
\ No newline at end of file
diff --git a/src/core/mpi/Reduce.h b/src/core/mpi/Reduce.h
index 5e9bb8220112ff4bff9c19a02e66cb3c2d801d46..a0b6edb39cad16fdfa5c527f83f9fe007ba9fa2c 100644
--- a/src/core/mpi/Reduce.h
+++ b/src/core/mpi/Reduce.h
@@ -16,18 +16,19 @@
 //! \file Reduce.h
 //! \ingroup core
 //! \author Christian Godenschwager <christian.godenschwager@fau.de>
+//! \author Michael Zikeli <michael.zikeli@fau.de>
 //
 //======================================================================================================================
 
 #pragma once
 
-#include "BufferDataTypeExtensions.h"
-
 #include "core/Abort.h"
 #include "core/DataTypes.h"
 #include "core/debug/Debug.h"
-#include "core/mpi/MPIManager.h"
 #include "core/mpi/MPIWrapper.h"
+#include "core/mpi/Operation.h"
+
+#include "BufferDataTypeExtensions.h"
 
 #include "core/math/Vector3.h"
 
@@ -36,33 +37,10 @@
 
 
 namespace walberla {
-namespace mpi {
 
 
-
-enum Operation { MIN, MAX, SUM, PRODUCT, LOGICAL_AND, BITWISE_AND, LOGICAL_OR, BITWISE_OR, LOGICAL_XOR, BITWISE_XOR };
-
-inline MPI_Op toMPI_Op( Operation operation )
+namespace mpi
 {
-   switch( operation )
-   {
-   case MIN:         return MPI_MIN;
-   case MAX:         return MPI_MAX;
-   case SUM:         return MPI_SUM;
-   case PRODUCT:     return MPI_PROD;
-   case LOGICAL_AND: return MPI_LAND;
-   case BITWISE_AND: return MPI_BAND;
-   case LOGICAL_OR:  return MPI_LOR;
-   case BITWISE_OR:  return MPI_BOR;
-   case LOGICAL_XOR: return MPI_LXOR;
-   case BITWISE_XOR: return MPI_BXOR;
-   default:          WALBERLA_ABORT( "Unknown operation!" );
-   }
-#ifdef __IBMCPP__
-   return MPI_SUM; // never reached, helps to suppress a warning from the IBM compiler
-#endif
-}
-
 //======================================================================================================================
 /*!
  *  \brief Reduces a value over all processes in-place
@@ -91,11 +69,11 @@ void reduceInplace( T & value, Operation operation, int recvRank = 0, MPI_Comm c
 
    if( myRank == recvRank )
    {
-      MPI_Reduce( MPI_IN_PLACE, &value, 1, MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm );
+      MPI_Reduce( MPI_IN_PLACE, &value, 1, MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm );
    }
    else
    {
-      MPI_Reduce( &value, nullptr, 1, MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm );
+      MPI_Reduce( &value, nullptr, 1, MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm );
    }
 }
 
@@ -128,11 +106,11 @@ inline void reduceInplace( bool & value, Operation operation, int recvRank = 0,
 
    if( myRank == recvRank )
    {
-      MPI_Reduce( MPI_IN_PLACE, &intValue, 1, MPITrait<int>::type(), toMPI_Op(operation), recvRank, comm );
+      MPI_Reduce( MPI_IN_PLACE, &intValue, 1, MPITrait<int>::type(), MPITrait<int>::operation(operation), recvRank, comm );
    }
    else
    {
-      MPI_Reduce( &intValue, nullptr, 1, MPITrait<int>::type(), toMPI_Op(operation), recvRank, comm );
+      MPI_Reduce( &intValue, nullptr, 1, MPITrait<int>::type(), MPITrait<int>::operation(operation), recvRank, comm );
    }
 
    value = intValue != 0;
@@ -172,11 +150,11 @@ T reduce( const T value, Operation operation, int recvRank = 0, MPI_Comm comm =
 
    if( myRank == recvRank )
    {
-      MPI_Reduce( const_cast<T*>( &value ), &result, 1, MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm );
+      MPI_Reduce( const_cast<T*>( &value ), &result, 1, MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm );
    }
    else
    {
-      MPI_Reduce( const_cast<T*>( &value ), nullptr, 1, MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm );
+      MPI_Reduce( const_cast<T*>( &value ), nullptr, 1, MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm );
    }
 
    return result;
@@ -213,11 +191,11 @@ inline bool reduce( const bool value, Operation operation, int recvRank = 0, MPI
 
    if( myRank == recvRank )
    {
-      MPI_Reduce( &intValue, &intResult, 1, MPITrait<int>::type(), toMPI_Op(operation), recvRank, comm );
+      MPI_Reduce( &intValue, &intResult, 1, MPITrait<int>::type(), MPITrait<int>::operation(operation), recvRank, comm );
    }
    else
    {
-      MPI_Reduce( &intValue, nullptr, 1, MPITrait<int>::type(), toMPI_Op(operation), recvRank, comm );
+      MPI_Reduce( &intValue, nullptr, 1, MPITrait<int>::type(), MPITrait<int>::operation(operation), recvRank, comm );
    }
 
    return intResult != 0;
@@ -252,11 +230,11 @@ void reduceInplace( std::vector<T> & values, Operation operation, int recvRank =
 
    if( myRank == recvRank )
    {
-      MPI_Reduce( MPI_IN_PLACE, values.empty() ? nullptr : &values[0], int_c( values.size() ), MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm );
+      MPI_Reduce( MPI_IN_PLACE, values.empty() ? nullptr : &values[0], int_c( values.size() ), MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm );
    }
    else
    {
-      MPI_Reduce( values.empty() ? nullptr : &values[0], nullptr, int_c( values.size() ), MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm );
+      MPI_Reduce( values.empty() ? nullptr : &values[0], nullptr, int_c( values.size() ), MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm );
    }
 }
 
@@ -292,14 +270,14 @@ inline void reduceInplace( std::vector<bool> & values, Operation operation, int
 
    if( myRank == recvRank )
    {
-      MPI_Reduce( MPI_IN_PLACE, sendBuffer.empty() ? nullptr : &sendBuffer[0], int_c( sendBuffer.size() ), MPITrait<uint8_t>::type(), toMPI_Op(operation), recvRank, comm );
+      MPI_Reduce( MPI_IN_PLACE, sendBuffer.empty() ? nullptr : &sendBuffer[0], int_c( sendBuffer.size() ), MPITrait<uint8_t>::type(), MPITrait<uint8_t>::operation(operation), recvRank, comm );
       size_t size = values.size();
       convert( sendBuffer, values );
       values.resize(size);
    }
    else
    {
-      MPI_Reduce( sendBuffer.empty() ? nullptr : &sendBuffer[0], nullptr, int_c( sendBuffer.size() ), MPITrait<uint8_t>::type(), toMPI_Op(operation), recvRank, comm );
+      MPI_Reduce( sendBuffer.empty() ? nullptr : &sendBuffer[0], nullptr, int_c( sendBuffer.size() ), MPITrait<uint8_t>::type(), MPITrait<uint8_t>::operation(operation), recvRank, comm );
    }
 }
 
@@ -331,11 +309,11 @@ void reduceInplace( math::Vector3<T> & values, Operation operation, int recvRank
 
    if( myRank == recvRank )
    {
-      MPI_Reduce( MPI_IN_PLACE, values.data(), 3, MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm );
+      MPI_Reduce( MPI_IN_PLACE, values.data(), 3, MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm );
    }
    else
    {
-      MPI_Reduce( values.data(), nullptr, 3, MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm );
+      MPI_Reduce( values.data(), nullptr, 3, MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm );
    }
 }
 
@@ -367,11 +345,11 @@ inline void reduceInplace( math::Vector3<bool> & values, Operation operation, in
 
    if( myRank == recvRank )
    {
-      MPI_Reduce( MPI_IN_PLACE, intValues.data(), 3, MPITrait<int>::type(), toMPI_Op(operation), recvRank, comm );
+      MPI_Reduce( MPI_IN_PLACE, intValues.data(), 3, MPITrait<int>::type(), MPITrait<int>::operation(operation), recvRank, comm );
    }
    else
    {
-      MPI_Reduce( intValues.data(), nullptr, 3, MPITrait<int>::type(), toMPI_Op(operation), recvRank, comm );
+      MPI_Reduce( intValues.data(), nullptr, 3, MPITrait<int>::type(), MPITrait<int>::operation(operation), recvRank, comm );
    }
 
    for(uint_t i = 0; i < 3; ++i)
@@ -411,11 +389,11 @@ math::Vector3<T> reduce( const math::Vector3<T> & values, Operation operation, i
 
    if( myRank == recvRank )
    {
-      MPI_Reduce( const_cast<T*>( values.data() ), result.data(), 3, MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm );
+      MPI_Reduce( const_cast<T*>( values.data() ), result.data(), 3, MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm );
    }
    else
    {
-      MPI_Reduce( const_cast<T*>( values.data() ), nullptr, 3, MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm );
+      MPI_Reduce( const_cast<T*>( values.data() ), nullptr, 3, MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm );
    }
 
    return result;
@@ -452,14 +430,14 @@ inline math::Vector3<bool> reduce( const math::Vector3<bool> & values, Operation
 
    if( myRank == recvRank )
    {
-      MPI_Reduce( MPI_IN_PLACE, intValues.data(), 3, MPITrait<int>::type(), toMPI_Op(operation), recvRank, comm );
+      MPI_Reduce( MPI_IN_PLACE, intValues.data(), 3, MPITrait<int>::type(), MPITrait<int>::operation(operation), recvRank, comm );
 
       for(uint_t i = 0; i < 3; ++i)
          results[i] = intValues[i] != 0;
    }
    else
    {
-      MPI_Reduce( intValues.data(), nullptr, 3, MPITrait<int>::type(), toMPI_Op(operation), recvRank, comm );
+      MPI_Reduce( intValues.data(), nullptr, 3, MPITrait<int>::type(), MPITrait<int>::operation(operation), recvRank, comm );
    }
 
    return results;
@@ -487,7 +465,7 @@ T allReduce( const T & value, Operation operation, MPI_Comm comm = MPI_COMM_WORL
    WALBERLA_NON_MPI_SECTION() { return value; }
 
    T result;
-   MPI_Allreduce( const_cast<T*>( &value ), &result, 1, MPITrait<T>::type(), toMPI_Op(operation), comm );
+   MPI_Allreduce( const_cast<T*>( &value ), &result, 1, MPITrait<T>::type(), MPITrait<T>::operation(operation), comm );
    return result;
 }
 
@@ -514,7 +492,7 @@ inline bool allReduce( const bool value, Operation operation, MPI_Comm comm = MP
 
    int intValue = value ? 1 : 0;
 
-   MPI_Allreduce( MPI_IN_PLACE, &intValue, 1, MPITrait<int>::type(), toMPI_Op(operation), comm );
+   MPI_Allreduce( MPI_IN_PLACE, &intValue, 1, MPITrait<int>::type(), MPITrait<int>::operation(operation), comm );
 
    return intValue != 0;
 }
@@ -539,7 +517,7 @@ void allReduceInplace( T & value, Operation operation, MPI_Comm comm = MPI_COMM_
 
    WALBERLA_NON_MPI_SECTION() { return; }
 
-   MPI_Allreduce( MPI_IN_PLACE, &value, 1, MPITrait<T>::type(), toMPI_Op(operation), comm );
+   MPI_Allreduce( MPI_IN_PLACE, &value, 1, MPITrait<T>::type(), MPITrait<T>::operation(operation), comm );
 }
 
 
@@ -562,7 +540,7 @@ inline void allReduceInplace( bool & value, Operation operation, MPI_Comm comm =
 
    int intValue = value ? 1 : 0;
 
-   MPI_Allreduce( MPI_IN_PLACE, &intValue, 1, MPITrait<int>::type(), toMPI_Op(operation), comm );
+   MPI_Allreduce( MPI_IN_PLACE, &intValue, 1, MPITrait<int>::type(), MPITrait<int>::operation(operation), comm );
 
    value = intValue != 0;
 }
@@ -587,7 +565,7 @@ void allReduceInplace( std::vector<T> & values, Operation operation, MPI_Comm co
 
    WALBERLA_NON_MPI_SECTION() { return; }
 
-   MPI_Allreduce( MPI_IN_PLACE, values.empty() ? nullptr : &values[0], int_c( values.size() ), MPITrait<T>::type(), toMPI_Op(operation), comm );
+   MPI_Allreduce( MPI_IN_PLACE, values.empty() ? nullptr : &values[0], int_c( values.size() ), MPITrait<T>::type(), MPITrait<T>::operation(operation), comm );
 }
 
 
@@ -612,7 +590,7 @@ inline void allReduceInplace( std::vector<bool> & bools, Operation operation, MP
    std::vector<uint8_t> sendBuffer;
 
    convert( bools, sendBuffer );
-   MPI_Allreduce( MPI_IN_PLACE, sendBuffer.empty() ? nullptr : &sendBuffer[0], int_c( sendBuffer.size() ), MPITrait<uint8_t>::type(), toMPI_Op(operation), comm );
+   MPI_Allreduce( MPI_IN_PLACE, sendBuffer.empty() ? nullptr : &sendBuffer[0], int_c( sendBuffer.size() ), MPITrait<uint8_t>::type(), MPITrait<uint8_t>::operation(operation), comm );
    auto size = bools.size();
    convert(sendBuffer, bools);
    bools.resize(size);
@@ -637,7 +615,7 @@ void allReduceInplace( math::Vector3<T> & values, Operation operation, MPI_Comm
 
    WALBERLA_NON_MPI_SECTION() { return; }
 
-   MPI_Allreduce( MPI_IN_PLACE, values.data(), 3, MPITrait<T>::type(), toMPI_Op(operation), comm );
+   MPI_Allreduce( MPI_IN_PLACE, values.data(), 3, MPITrait<T>::type(), MPITrait<T>::operation(operation), comm );
 }
 
 
@@ -663,7 +641,7 @@ inline void allReduceInplace( math::Vector3<bool> & bools, Operation operation,
 
    math::Vector3<int> intValues{bools[0] ? 1 : 0, bools[1] ? 1 : 0, bools[2] ? 1 : 0};
 
-   MPI_Allreduce( MPI_IN_PLACE, intValues.data(), 3, MPITrait<int>::type(), toMPI_Op(operation), comm );
+   MPI_Allreduce( MPI_IN_PLACE, intValues.data(), 3, MPITrait<int>::type(), MPITrait<int>::operation(operation), comm );
 
    for(uint_t i = 0; i < 3; ++i)
    {
diff --git a/src/core/timing/Timer.h b/src/core/timing/Timer.h
index 9f7c3f97d1066ff7ffa322cb5d6550c9f5d5013b..efc7016409a6e91d8c83b96aebfbd9fc40c4a8cf 100644
--- a/src/core/timing/Timer.h
+++ b/src/core/timing/Timer.h
@@ -30,6 +30,7 @@
 #include "WcPolicy.h"
 
 #include "core/DataTypes.h"
+#include "core/mpi/MPIManager.h"
 #include "core/mpi/RecvBuffer.h"
 #include "core/mpi/Reduce.h"
 #include "core/mpi/SendBuffer.h"
@@ -527,7 +528,7 @@ shared_ptr<Timer<TP> > getReduced( Timer<TP>& timer, ReduceType rt, int targetRa
    }
 
    //uint_t counter, double min, double max, double total, double sumOfSquares
-   if ( targetRank < 0 || targetRank == MPIManager::instance()->worldRank() )
+   if ( targetRank < 0 || targetRank == mpi::MPIManager::instance()->worldRank() )
       return make_shared<Timer<TP> >( mpi::MPIManager::instance()->numProcesses(), min, max, total, sumOfSquares  );
 
    return nullptr;
diff --git a/src/field/distributors/KernelDistributor.h b/src/field/distributors/KernelDistributor.h
index 712d9a7b7e32ae96b0d259873d102166675b22b3..bc0abb8fa5882e51960d936132cd3613d55f5172 100644
--- a/src/field/distributors/KernelDistributor.h
+++ b/src/field/distributors/KernelDistributor.h
@@ -62,7 +62,7 @@ public:
       WALBERLA_ASSERT(baseField.nrOfGhostLayers() > uint_t(0), "field for kernel distribution needs at least one ghost layer");
    }
 
-   inline bool operator==( const OwnType & other ){ return baseField_ == other.baseField_; }
+   inline bool operator==( const OwnType & other ) const { return baseField_ == other.baseField_; }
 
    template< typename ForwardIterator_T >
    inline void distribute( const Vector3<real_t> & position, ForwardIterator_T distributeValueBegin )
diff --git a/src/field/distributors/NearestNeighborDistributor.h b/src/field/distributors/NearestNeighborDistributor.h
index 932f443b3e30af9ab6a2bc4f6c6f3f585e485473..c4819cb9119d46e46dbc34bed40228664bb5cffb 100644
--- a/src/field/distributors/NearestNeighborDistributor.h
+++ b/src/field/distributors/NearestNeighborDistributor.h
@@ -59,7 +59,7 @@ public:
    : blockStorage_( blockStorage ), block_( block ), baseField_( baseField ), flagField_( flagField ), evaluationMask_( evaluationMask )
    {}
 
-   inline bool operator==( const OwnType & other ){ return baseField_ == other.baseField_; }
+   inline bool operator==( const OwnType & other ) const { return baseField_ == other.baseField_; }
 
    template< typename ForwardIterator_T >
    inline void distribute( const Vector3<real_t> & position, ForwardIterator_T distributeValueBegin )
diff --git a/src/field/interpolators/KernelFieldInterpolator.h b/src/field/interpolators/KernelFieldInterpolator.h
index 0e59fabf21dfd4899e62ca29961811a0e99e59d9..0f5987e76e844df9505869fbaab3feadacbc88fa 100644
--- a/src/field/interpolators/KernelFieldInterpolator.h
+++ b/src/field/interpolators/KernelFieldInterpolator.h
@@ -105,7 +105,7 @@ public:
    }
 
 
-   inline bool operator==( const OwnType & other ){ return baseField_ == other.baseField_; }
+   inline bool operator==( const OwnType & other ) const { return baseField_ == other.baseField_; }
 
    template< typename ForwardIterator_T >
    inline void get( const Vector3<real_t> & position, ForwardIterator_T interpolationResultBegin )
diff --git a/src/field/interpolators/NearestNeighborFieldInterpolator.h b/src/field/interpolators/NearestNeighborFieldInterpolator.h
index b5b5cba7f65a3e06517356933d157108ba81e90c..bb08276f9f12949a10461e96b1a815acd64f2559 100644
--- a/src/field/interpolators/NearestNeighborFieldInterpolator.h
+++ b/src/field/interpolators/NearestNeighborFieldInterpolator.h
@@ -57,7 +57,7 @@ public:
    {}
 
 
-   inline bool operator==( const OwnType & other ){ return baseField_ == other.baseField_; }
+   inline bool operator==( const OwnType & other ) const { return baseField_ == other.baseField_; }
 
    template< typename ForwardIterator_T >
    inline void get( const Vector3<real_t> & position, ForwardIterator_T interpolationResultBegin )
diff --git a/src/field/interpolators/TrilinearFieldInterpolator.h b/src/field/interpolators/TrilinearFieldInterpolator.h
index e9809d835f1bf67e89da8a0eb2b5a9493624c84e..351ed7afea09db1c75feeb59dd771afb0796949e 100644
--- a/src/field/interpolators/TrilinearFieldInterpolator.h
+++ b/src/field/interpolators/TrilinearFieldInterpolator.h
@@ -62,7 +62,7 @@ public:
    }
 
 
-   inline bool operator==( const OwnType & other ){ return baseField_ == other.baseField_; }
+   inline bool operator==( const OwnType & other ) const { return baseField_ == other.baseField_; }
 
    template< typename ForwardIterator_T >
    inline void get( const Vector3<real_t> & position, ForwardIterator_T interpolationResultBegin )
diff --git a/tests/core/CMakeLists.txt b/tests/core/CMakeLists.txt
index 240fe56dc9195983d88f4930e7e4bcb049f009e5..604de6371b573b350638ea4fb4f003d93960cb7c 100644
--- a/tests/core/CMakeLists.txt
+++ b/tests/core/CMakeLists.txt
@@ -119,6 +119,11 @@ waLBerla_execute_test( NAME SetReductionTest4  COMMAND $<TARGET_FILE:SetReductio
 waLBerla_execute_test( NAME SetReductionTest5  COMMAND $<TARGET_FILE:SetReductionTest> PROCESSES 5 )
 waLBerla_execute_test( NAME SetReductionTest27 COMMAND $<TARGET_FILE:SetReductionTest> PROCESSES 27 )
 
+if ( WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT )
+   waLBerla_compile_test( Name MPIFloat16Test FILES mpi/MPIFloat16Test.cpp DEPENDS core )
+   target_compile_features( MPIFloat16Test PUBLIC cxx_std_23 )
+   waLBerla_execute_test( NAME MPIFloat16Test4 COMMAND $<TARGET_FILE:MPIFloat16Test> PROCESSES 4)
+endif ()
 
 
 ##############
diff --git a/tests/core/FP16Test.cpp b/tests/core/FP16Test.cpp
index 60a2be0eeee0872449f6a648fa1c65abbbda7f42..e08dd55b099c8edc36f97ab4f86a613d6671d1ac 100644
--- a/tests/core/FP16Test.cpp
+++ b/tests/core/FP16Test.cpp
@@ -70,7 +70,7 @@ void fp16Test( int argc, char ** argv )
    const float16 y = -1.8f16;
    const float64 z = -0.6;
    WALBERLA_LOG_INFO_ON_ROOT("   + " << (double) x << " + " << (double) y << " == " << (float64) (x + y) << " ? ")
-   WALBERLA_CHECK_FLOAT_EQUAL((float64) (x + y), z, "float16 addition does not work correctly.");
+   WALBERLA_CHECK_FLOAT_EQUAL( (x + y), (float16) z, "float16 addition does not work correctly.");
 #endif
 }
 
diff --git a/tests/core/Float16SupportTest.cpp b/tests/core/Float16SupportTest.cpp
index 04ea9378f54eee4ee78f81177fc609d732da21c5..5116886ff4e154cecb21465acc39e40caea776dc 100644
--- a/tests/core/Float16SupportTest.cpp
+++ b/tests/core/Float16SupportTest.cpp
@@ -19,14 +19,16 @@
 //
 //======================================================================================================================
 
-#include <memory>
-#include <numeric>
-
 #include "core/DataTypes.h"
 #include "core/Environment.h"
 #include "core/logging/Logging.h"
 
+#include <numeric>
+
 namespace walberla::simple_Float16_test {
+
+
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
 using walberla::floatIsEqual;
 using walberla::real_t;
 using walberla::uint_c;
@@ -90,12 +92,12 @@ void vector_test()
    fpDst_cast.assign( 10, (dst_t) 1.5 );
    fp32.assign( 10, 1.5f );
    std::copy( fpSrc.begin(), fpSrc.end(), fpDst.begin() );
-   WALBERLA_LOG_WARNING_ON_ROOT(
+   WALBERLA_LOG_INFO_ON_ROOT(
        " std::vector.assign is not able to assign "
        << typeid( src_t ).name() << " values to container of type " << precisionType << ".\n"
        << " Therefore, the floating point value for assign must be cast beforehand or std::copy must be used, since it uses a static_cast internally." );
 
-   fpSrc[5]      = 2.3;
+   fpSrc[5]      = real_c(2.3);
    fpDst_cast[5] = (dst_t) 2.3;
    fp32[5]       = 2.3f;
    fpDst[5]      = (dst_t) 2.3;
@@ -118,7 +120,7 @@ void vector_test()
       WALBERLA_CHECK_FLOAT_EQUAL( (dst_t)sumSrc, sumDst );
    }
    {
-      fpSrc.assign( 13, 1.3 );
+      fpSrc.assign(13, real_c(1.3));
       std::copy( fpSrc.begin(), fpSrc.end(), fpDst.begin() );
       const auto sumSrc = std::reduce(fpSrc.begin(), fpSrc.end());
       const auto sumDst = std::reduce(fpDst.begin(), fpDst.end());
@@ -126,8 +128,11 @@ void vector_test()
    }
 } // simple_Float16_test::vector_test()
 
+#endif // WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+
 int main( int argc, char** argv )
 {
+#ifdef  WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
    // This check only works since C++23 and is used in many implementations, so it's important, that it works.
    WALBERLA_CHECK( std::is_arithmetic< dst_t >::value );
 
@@ -149,15 +154,17 @@ int main( int argc, char** argv )
    WALBERLA_LOG_INFO_ON_ROOT( " Start a where float32 is sufficient but float16 is not." );
    WALBERLA_CHECK_FLOAT_UNEQUAL( dst_t(1.0)-dst_t(0.3), 1.0-0.3 );
    WALBERLA_CHECK_FLOAT_EQUAL( 1.0f-0.3f, 1.0-0.3 );
+#else
+   WALBERLA_LOG_WARNING_ON_ROOT( "\nWALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT is not enabled. So this test cannot be run!\n" )
+#endif // WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
 
-   return 0;
+   return EXIT_SUCCESS;
 } // simple_Float16_test::main()
 
 } // namespace walberla::simple_Float16_test
 
 int main( int argc, char** argv )
 {
-   walberla::simple_Float16_test::main( argc, argv );
+   return walberla::simple_Float16_test::main( argc, argv );
 
-   return EXIT_SUCCESS;
 } // main()
diff --git a/tests/core/mpi/MPIFloat16Test.cpp b/tests/core/mpi/MPIFloat16Test.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..be0bc5aa4f86eacc22f223c165f1dd943d5dbd56
--- /dev/null
+++ b/tests/core/mpi/MPIFloat16Test.cpp
@@ -0,0 +1,162 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file MPIFloat16.cpp
+//! \ingroup core
+//! \author Michael Zikeli <michael.zikeli@fau.de>
+//! \brief This test is to check whether the self defined MPI_Datatype and the self defined Operators are working.
+//!    To verify the type, some parts of the BufferSystemTest are just copied.
+//!    To verify the operations, a simple AllReduce is executed for each operation.
+//!        For now only { SUM, MIN, MAX } are implemented, thus only those are tested.
+//
+//======================================================================================================================
+
+#include "core/Abort.h"
+#include "core/debug/TestSubsystem.h"
+#include "core/logging/Logging.h"
+#include "core/mpi/BufferSystem.h"
+#include "core/mpi/Environment.h"
+#include "core/mpi/Reduce.h"
+
+
+namespace walberla::mpifloat16test
+{
+
+using mpi::BufferSystem;
+using namespace std::literals::chrono_literals;
+
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+void symmetricCommunication()
+{
+   const int MSG_SIZE = 10;
+
+   auto mpiManager = MPIManager::instance();
+
+   const int numProcesses  = mpiManager->numProcesses();
+   const int rank          = mpiManager->worldRank();
+   const int leftNeighbor  = (rank-1+numProcesses)  % numProcesses;
+   const int rightNeighbor = (rank+1) % numProcesses;
+
+   WALBERLA_CHECK_GREATER_EQUAL( numProcesses, 3 );
+
+
+   BufferSystem bs ( MPI_COMM_WORLD );
+
+   // Pack Message to left neighbor containing own rank
+   for( int i=0; i< MSG_SIZE; ++i )
+      bs.sendBuffer( leftNeighbor )  << numeric_cast<float16>(rank) + float16(0.3);
+
+   // Pack Message to right neighbor containing own rank
+   for( int i=0; i< MSG_SIZE; ++i )
+      bs.sendBuffer( rightNeighbor ) << numeric_cast<float16>(rank) - float16(0.3);
+
+   bs.setReceiverInfoFromSendBufferState( true, false );
+   bs.sendAll();
+
+   for( auto it = bs.begin(); it != bs.end(); ++it )
+   {
+      WALBERLA_CHECK ( it.rank() == leftNeighbor || it.rank() == rightNeighbor );
+      WALBERLA_CHECK_EQUAL( it.buffer().size(), MSG_SIZE * sizeof(float16) + MSG_SIZE * mpi::BUFFER_DEBUG_OVERHEAD );
+
+      auto receivedVal = float16(-1);
+      it.buffer() >> receivedVal;
+
+      WALBERLA_CHECK_EQUAL( typeid(receivedVal), typeid(float16) );
+
+      if ( it.rank() == leftNeighbor )
+      {
+         WALBERLA_CHECK_FLOAT_EQUAL( receivedVal, numeric_cast<float16>( it.rank() ) - float16(0.3) );
+         WALBERLA_CHECK_FLOAT_UNEQUAL( receivedVal, numeric_cast<float16>( it.rank() ) + float16(0.3), 0.5);
+      } else {
+         WALBERLA_CHECK_FLOAT_EQUAL( receivedVal, numeric_cast<float16>( it.rank() ) + float16(0.3) );
+         WALBERLA_CHECK_FLOAT_UNEQUAL( receivedVal, numeric_cast<float16>( it.rank() ) - float16(0.3), 0.5);
+      }
+   }
+
+   WALBERLA_CHECK_EQUAL( bs.getBytesSent(), (MSG_SIZE * sizeof(float16) + MSG_SIZE * mpi::BUFFER_DEBUG_OVERHEAD) * 2 );
+   WALBERLA_CHECK_EQUAL( bs.getBytesReceived(), (MSG_SIZE * sizeof(float16) + MSG_SIZE * mpi::BUFFER_DEBUG_OVERHEAD) * 2 );
+}
+
+void reduce( )
+{
+
+   auto mpiManager = MPIManager::instance();
+
+   const int numProcesses  = mpiManager->numProcesses();
+   const int rank          = mpiManager->worldRank();
+
+   const auto startValue = numeric_cast<float16>(rank) + float16(0.3);
+
+   // SUM
+   auto value = startValue;
+
+   walberla::mpi::allReduceInplace( value, walberla::mpi::SUM );
+
+   auto sum = float16( 0.0 );
+   for( int i = 0; i < numProcesses; ++i )
+      sum += numeric_cast<float16>(i) + float16(0.3);
+   WALBERLA_CHECK_FLOAT_EQUAL( value, sum );
+   WALBERLA_CHECK_FLOAT_UNEQUAL( value, ((numProcesses*(numProcesses-1)/2.)+0.3*numProcesses), 1e-10 );
+
+   // MIN
+   value = startValue;
+
+   walberla::mpi::allReduceInplace( value, walberla::mpi::MIN );
+   WALBERLA_CHECK_FLOAT_EQUAL( value, numeric_cast<float16>(0.3) );
+
+   // MAX
+   value = startValue;
+
+   walberla::mpi::allReduceInplace( value, walberla::mpi::MAX );
+   WALBERLA_CHECK_FLOAT_EQUAL( value, numeric_cast<float16>(numProcesses-1)+numeric_cast<float16>(0.3) );
+
+}
+#endif // WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+
+int main( int argc, char** argv )
+{
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+   mpi::Environment mpiEnv( argc, argv );
+   debug::enterTestMode();
+   walberla::logging::Logging::instance()->setLogLevel( walberla::logging::Logging::INFO );
+
+   auto mpiManager   = MPIManager::instance();
+   auto numProcesses = mpiManager->numProcesses();
+
+   if(numProcesses <= 2)
+   {
+      WALBERLA_ABORT("This test has to be executed on at least 3 processes. Executed on " <<  numProcesses);
+      return EXIT_FAILURE;
+   }
+
+   WALBERLA_LOG_INFO_ON_ROOT("Testing Symmetric Communication...");
+   symmetricCommunication();
+
+   WALBERLA_LOG_INFO_ON_ROOT("Testing Reduce operations...");
+   reduce( );
+#else
+   WALBERLA_LOG_WARNING_ON_ROOT( "\nWALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT is not enabled. So this test cannot be run!\n" )
+#endif // WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+
+   return EXIT_SUCCESS;
+
+} // mpifloat16test::main()
+
+} // namespace walberla::mpifloat16test
+
+int main( int argc, char** argv )
+{
+   return walberla::mpifloat16test::main( argc, argv );
+} // main()