diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 27b8999837b423911c0e036580b9eeeca3e72fee..c4dc0ffe6292e737ea319162a0efd7106a1a32fb 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -51,6 +51,7 @@ stages: -DWALBERLA_BUILD_WITH_METIS=$WALBERLA_BUILD_WITH_METIS -DWALBERLA_BUILD_WITH_PARMETIS=$WALBERLA_BUILD_WITH_PARMETIS -DWALBERLA_BUILD_WITH_FFTW=$WALBERLA_BUILD_WITH_FFTW + -DWALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT=$WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT -DWALBERLA_ENABLE_GUI=$WALBERLA_ENABLE_GUI -DWALBERLA_BUILD_WITH_CODEGEN=$WALBERLA_BUILD_WITH_CODEGEN -DWALBERLA_STL_BOUNDS_CHECKS=$WALBERLA_STL_BOUNDS_CHECKS @@ -769,6 +770,7 @@ gcc_13_serial: WALBERLA_BUILD_WITH_PARMETIS: "OFF" WALBERLA_BUILD_WITH_CODEGEN: "ON" WALBERLA_BUILD_WITH_PYTHON: "ON" + WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT: "ON" only: variables: - $ENABLE_NIGHTLY_BUILDS @@ -791,6 +793,7 @@ gcc_13_mpionly: WALBERLA_BUILD_WITH_OPENMP: "OFF" WALBERLA_BUILD_WITH_CODEGEN: "ON" WALBERLA_BUILD_WITH_PYTHON: "ON" + WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT: "ON" only: variables: - $ENABLE_NIGHTLY_BUILDS @@ -813,6 +816,7 @@ gcc_13_hybrid: WALBERLA_BUILD_WITH_CUDA: "ON" WALBERLA_BUILD_WITH_CODEGEN: "ON" WALBERLA_BUILD_WITH_PYTHON: "ON" + WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT: "ON" tags: - cuda11 - docker @@ -835,6 +839,7 @@ gcc_13_serial_dbg: CMAKE_BUILD_TYPE: "DebugOptimized" WALBERLA_BUILD_WITH_CODEGEN: "ON" WALBERLA_BUILD_WITH_PYTHON: "ON" + WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT: "ON" tags: - cuda11 - docker @@ -855,6 +860,7 @@ gcc_13_mpionly_dbg: WALBERLA_BUILD_WITH_OPENMP: "OFF" WALBERLA_BUILD_WITH_CODEGEN: "ON" WALBERLA_BUILD_WITH_PYTHON: "ON" + WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT: "ON" tags: - cuda11 - docker @@ -874,6 +880,7 @@ gcc_13_hybrid_dbg: CMAKE_BUILD_TYPE: "DebugOptimized" WALBERLA_BUILD_WITH_CODEGEN: "ON" WALBERLA_BUILD_WITH_PYTHON: "ON" + WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT: "ON" tags: - cuda11 - docker @@ -896,6 +903,7 @@ gcc_13_hybrid_dbg_sp: WALBERLA_BUILD_WITH_METIS: "OFF" WALBERLA_BUILD_WITH_CODEGEN: "ON" WALBERLA_BUILD_WITH_PYTHON: "ON" + WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT: "ON" tags: - cuda11 - docker diff --git a/CMakeLists.txt b/CMakeLists.txt index 5eae2503b9bd414976b0cbfce5478d2af4ea4a54..1d21694af654ba0b996a8bbce0c3096ef6a125ab 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1276,12 +1276,6 @@ if ( WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT ) message( FATAL_ERROR "Compiler: ${CMAKE_CXX_COMPILER} Version: ${CMAKE_CXX_COMPILER_VERSION} does not support half precision" ) endif () - # Local host optimization - if ( NOT WALBERLA_OPTIMIZE_FOR_LOCALHOST ) - message( WARNING "[WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT] " - "You are not optimizing for localhost. You may encounter linker errors, or WORSE: silent incorrect fp16 arithmetic! Consider also enabling WALBERLA_OPTIMIZE_FOR_LOCALHOST!" ) - endif () # Local host check - endif () # Check if WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT is set ############################################################################################################################ diff --git a/src/core/DataTypes.h b/src/core/DataTypes.h index 4e7c019a86dcf0ce23fb7c8a9e66e6add8500bde..d6147c12be61a072f112e49de6e68001a6013abc 100644 --- a/src/core/DataTypes.h +++ b/src/core/DataTypes.h @@ -167,6 +167,7 @@ using real_t = double; using real_t = float; #endif +#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT /// Half precision support. Experimental. Use carefully. /// /// This feature is experimental, since it strictly depends on the underlying architecture and compiler support. @@ -174,7 +175,6 @@ using real_t = float; /// interchange. Arithmetic operations will likely involve casting to fp32 (C++ float) and truncation to fp16. /// Only bandwidth bound code may therefore benefit. None of this is guaranteed, and may change in the future. /// -#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT /// FIXME: (not really right) Clang version must be 15 or higher for x86 half precision support. /// FIXME: (not really right) GCC version must be 12 or higher for x86 half precision support. /// FIXME: (I don't know) Also support seems to require SSE, so ensure that respective instruction sets are enabled. @@ -202,7 +202,7 @@ using half = _Float16; // Another possible half precision format would be the one from Google Brain (bfloat16) with an 8 bit exponent and a 7 bit mantissa. // Compare https://i10git.cs.fau.de/ab04unyc/walberla/-/issues/23 using float16 = half; -#endif +#endif // WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT using float32 = float; using float64 = double; @@ -276,7 +276,7 @@ inline bool floatIsEqual( walberla::float16 lhs, walberla::float16 rhs, const wa const auto difference = lhs - rhs; return ( (difference < 0) ? -difference : difference ) < epsilon; } -#endif +#endif // WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT } // namespace walberla diff --git a/src/core/mpi/CMakeLists.txt b/src/core/mpi/CMakeLists.txt index 13cc3653bd41f03e1840abd09f71147eee3c57cd..2987e28c266bce6552659cce89f5ef9546927a76 100644 --- a/src/core/mpi/CMakeLists.txt +++ b/src/core/mpi/CMakeLists.txt @@ -19,11 +19,14 @@ target_sources( core MPIIO.h MPIManager.cpp MPIManager.h + MPIOperation.h MPITextFile.cpp MPITextFile.h + MPIWrapper.cpp MPIWrapper.h OpenMPBufferSystem.h OpenMPBufferSystem.impl.h + Operation.h RecvBuffer.h Reduce.h SendBuffer.h diff --git a/src/core/mpi/Datatype.h b/src/core/mpi/Datatype.h index f717cb6d94c661aec320a864c972dbba4a49d2ae..ac933bbe8c1e572520d0e67f75f8199c1adc8910 100644 --- a/src/core/mpi/Datatype.h +++ b/src/core/mpi/Datatype.h @@ -22,6 +22,7 @@ #pragma once #include "MPIWrapper.h" +#include "core/Abort.h" namespace walberla { @@ -43,6 +44,21 @@ namespace mpi { WALBERLA_MPI_SECTION() { MPI_Type_commit( &mpiDatatype_ ); } } + explicit Datatype(const uint_t byteSize) : mpiDatatype_(MPI_DATATYPE_NULL) + { + WALBERLA_MPI_SECTION() + { + if (MPI_Type_contiguous(int_c(byteSize), MPI_BYTE, &mpiDatatype_) != MPI_SUCCESS) + { + WALBERLA_ABORT("MPI_Type_contiguous " << typeid(mpiDatatype_).name() << " failed."); + } + if (MPI_Type_commit(&mpiDatatype_) != MPI_SUCCESS) + { + WALBERLA_ABORT("MPI_Type_commit " << typeid(mpiDatatype_).name() << " failed."); + } + } + } + void init( MPI_Datatype datatype ) { mpiDatatype_ = datatype; diff --git a/src/core/mpi/MPIManager.cpp b/src/core/mpi/MPIManager.cpp index a334bc16c4878cea58a1452cd083fba31d9d3c7e..c25ca1082277d89c8be486ae7a9c61350d6bfea0 100644 --- a/src/core/mpi/MPIManager.cpp +++ b/src/core/mpi/MPIManager.cpp @@ -119,6 +119,10 @@ void MPIManager::finalizeMPI() { WALBERLA_MPI_SECTION() { + /// Free the custom types and operators + customMPITypes_.clear(); + customMPIOperations_.clear(); + if (isMPIInitialized_ && !currentlyAborting_) { isMPIInitialized_ = false; diff --git a/src/core/mpi/MPIManager.h b/src/core/mpi/MPIManager.h index 9ba3fb4d04b8f6b0c7f1e2041454677b9156bf75..60ce4d8514e57bb2465a14dfa304cab73b3b8be6 100644 --- a/src/core/mpi/MPIManager.h +++ b/src/core/mpi/MPIManager.h @@ -18,23 +18,28 @@ //! \author Florian Schornbaum <florian.schornbaum@fau.de> //! \author Martin Bauer <martin.bauer@fau.de> //! \author Christian Godenschwager <christian.godenschwager@fau.de> +//! \author Michael Zikeli <michael.zikeli@fau.de> // //====================================================================================================================== #pragma once -#include "MPIWrapper.h" #include "core/DataTypes.h" #include "core/debug/Debug.h" #include "core/math/Uint.h" +#include "core/mpi/Datatype.h" +#include "core/mpi/MPIOperation.h" +#include "core/mpi/MPIWrapper.h" +#include "core/mpi/Operation.h" #include "core/singleton/Singleton.h" +#include <map> +#include <typeindex> namespace walberla { namespace mpi { - /** * Encapsulates MPI Rank/Communicator information * @@ -127,6 +132,87 @@ public: /// Indicates whether MPI-IO can be used with the current MPI communicator; certain versions of OpenMPI produce /// segmentation faults when using MPI-IO with a 3D Cartesian MPI communicator (see waLBerla issue #73) bool isCommMPIIOValid() const; + + /// Return the custom MPI_Datatype stored in 'customMPITypes_' and defined by the user and passed to 'commitCustomType'. + template< typename CType > + MPI_Datatype getCustomType() const + { + WALBERLA_MPI_SECTION() + { + return customMPITypes_.at(typeid(CType)).operator MPI_Datatype(); + } + WALBERLA_NON_MPI_SECTION() + { + WALBERLA_ABORT( "This should not be called, if waLBerla is compiled without MPI." ); + } + } + + /// Return the custom MPI_Op stored in 'customMPIOperation_' and defined by the user and passed to 'commitCustomOperation'. + template< typename CType > + MPI_Op getCustomOperation(mpi::Operation op) const + { + // FIXME the operation is actually type dependent but implementing this is not straightforward, + // compare comment at declaration of 'customMPIOperations_'. + WALBERLA_MPI_SECTION() + { + return customMPIOperations_.at(op).operator MPI_Op(); + } + WALBERLA_NON_MPI_SECTION() + { + WALBERLA_ABORT( "This should not be called, if waLBerla is compiled without MPI." ); + } + } + //@} + //******************************************************************************************************************* + + //** Setter Functions ********************************************************************************************** + /*! \name Setter Function */ + //@{ + ///! \brief Initializes a custom MPI_Datatype and logs it in the customMPITypes_ map. + ///! \param argument The argument that is expected by the constructor of mpi::Datatype + /// At the point of creation 26.01.2024 this is either MPI_Datatype or const int. + template < typename CType, class ConstructorArgumentType > + void commitCustomType( ConstructorArgumentType& argument ) + { + WALBERLA_MPI_SECTION() + { + if (isMPIInitialized_ && !currentlyAborting_) + { + static_assert( std::is_same_v<ConstructorArgumentType, const int> || std::is_same_v<ConstructorArgumentType, MPI_Datatype>, + "mpi::Datatype has only an constructor for an int value or a MPI_Datatype." ); + [[maybe_unused]] auto worked = std::get< 1 >( customMPITypes_.try_emplace(typeid(CType), argument) ); + WALBERLA_ASSERT(worked, "Emplacement of type " << typeid(CType).name() << " did not work."); + } else { + WALBERLA_ABORT( "MPI must be initialized before an new MPI_Datatype can be committed." ); + } + } + WALBERLA_NON_MPI_SECTION() + { + WALBERLA_ABORT( "This should not be called, if waLBerla is compiled without MPI." ); + } + } + + ///! \brief Initializes a custom MPI_Op and logs it in the customMPIOperation map + ///! \param op A operator, e.g. SUM, MIN. + ///! \param fct The definition of the MPI_User_function used for this operator. + template< typename CType > + void commitCustomOperation( mpi::Operation op, MPI_User_function* fct ) + { + WALBERLA_MPI_SECTION() + { + if (isMPIInitialized_ && !currentlyAborting_) + { + [[maybe_unused]] auto worked = std::get< 1 >(customMPIOperations_.try_emplace(op, fct)); + WALBERLA_ASSERT(worked, "Emplacement of operation " << typeid(op).name() << " of type " + << typeid(CType).name() << " did not work."); + } + else { WALBERLA_ABORT("MPI must be initialized before an new MPI_Op can be committed."); } + } + WALBERLA_NON_MPI_SECTION() + { + WALBERLA_ABORT( "This should not be called, if waLBerla is compiled without MPI." ); + } + } //@} //******************************************************************************************************************* @@ -163,6 +249,33 @@ private: // Singleton MPIManager() : comm_(MPI_COMM_NULL) { WALBERLA_NON_MPI_SECTION() { rank_ = 0; } } +/// It is possible to commit own datatypes to MPI, that are not part of the standard. One example would be float16. +/// With these maps, it is possible to track self defined MPI_Datatypes and MPI_Ops, to access them at any time and +/// place in the program, also, they are automatically freed once MPIManager::finalizeMPI is called. +/// To initialize types or operations and add them to the map, the getter functions 'commitCustomType' and +/// 'commitCustomOperation' should be used. This can for example be done e.g. in the specialization of the MPITrait of +/// the newly defined type. For an example see MPIWrapper.cpp + std::map< std::type_index, walberla::mpi::Datatype > customMPITypes_{}; + std::map< walberla::mpi::Operation, walberla::mpi::MPIOperation > customMPIOperations_{}; + // FIXME this must be type specific as well, but doing so is a bit more complicated. + // 1. Idea) Combining both maps together e.g. as std::map< typeid(CType), + // std::pair< MPI_DataType, + // std::map< Operation, + // MPI_Op > > > customMPITypesWithOps_{}; + // There the access is quite nasty to look at, but easily doable, the construction however is quite difficult + // also one needs to make sure that the type is initialized before the operation. + // 2. Idea) Leaving it as two maps customMPITypes_ and customMPIOperations, + // but storing a pair of typeid and operation as key for the operation map. + // This way everything would look nice, but someone needs to implement a comparison operator for this pair. + // I personally don't know where to put this comparison operator to, since it should not be part of the manager. + // 3. Idea) Since this relies on the use of MPITrait<CType> --> { MPI_Datatype, MPI_Op } someone could define a object + // to store in the MPIManager there, to keep the MPIManager light and easily understandable. + // I'm also not sure if the MPITrait is the right spot for this though. + // For more information about the changes done in the code to allow custom defined types and operations, + // check out MR !647 ( https://i10git.cs.fau.de/walberla/walberla/-/merge_requests/647 ) + + + }; // class MPIManager diff --git a/src/core/mpi/MPIOperation.h b/src/core/mpi/MPIOperation.h new file mode 100644 index 0000000000000000000000000000000000000000..3da98bfe8ea2bdb8783b02e997cdee486bb46a0d --- /dev/null +++ b/src/core/mpi/MPIOperation.h @@ -0,0 +1,64 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file MPIOperation.h +//! \ingroup core +//! \author Michael Zikeli <michael.zikeli@fau.de> +// +//====================================================================================================================== + +#pragma once + +#include "core/Abort.h" +#include "core/mpi/MPIWrapper.h" + +namespace walberla::mpi{ + +//******************************************************************************************************************* +/*! RAII class for MPI operators that commits and frees them +* +*/ +//******************************************************************************************************************* +class MPIOperation +{ + public: + MPIOperation() = delete; + + explicit MPIOperation( MPI_User_function* fct ) : mpiOperation_( MPI_OP_NULL ) + { + WALBERLA_MPI_SECTION() { + if ( MPI_Op_create( fct, true, &mpiOperation_) != MPI_SUCCESS ) + { + WALBERLA_ABORT("MPI_Op_create for " << typeid(mpiOperation_).name() << " failed." ); + } + } // WALBERLA_MPI_SECTIION + } + + ~MPIOperation() + { + WALBERLA_MPI_SECTION() { MPI_Op_free( & mpiOperation_ ); } + } + + operator MPI_Op() const + { + return mpiOperation_; + } + + protected: + MPI_Op mpiOperation_; +}; + + +} // namespace walberla::mpi \ No newline at end of file diff --git a/src/core/mpi/MPIWrapper.cpp b/src/core/mpi/MPIWrapper.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9d68668676c932de2c564dd37312ca3645104e45 --- /dev/null +++ b/src/core/mpi/MPIWrapper.cpp @@ -0,0 +1,142 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file MPIWrapper.cpp +//! \ingroup core +//! \author Michael Zikeli <michael.zikeli@fau.de> +// +//====================================================================================================================== + +#include "MPIWrapper.h" + +#include <set> + +#include "MPIManager.h" + +#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT +namespace walberla +{ + +namespace mpi +{ +namespace +{ +/// These functions than can be used by self defined mpi operations, e.g. by using CustomMPIOperation. +using float16_t = walberla::float16; +// The signature of MPI_User_function looks like this +// typedef void MPI_User_function( void *invec, void *inoutvec, int *len, MPI_Datatype *datatype); + +void sum(void* mpiRHSArray, void* mpiLHSArray, int* len, MPI_Datatype*) +{ + // cast mpi type to target c++ type + auto* rhs = (float16_t*) mpiRHSArray; + auto* lhs = (float16_t*) mpiLHSArray; + for (int i = 0; i < *len; ++i) + { + *lhs += *rhs; + } +} + +void min(void* mpiRHSArray, void* mpiLHSArray, int* len, MPI_Datatype*) +{ + // cast mpi type to target c++ type + auto* rhs = (float16_t*) mpiRHSArray; + auto* lhs = (float16_t*) mpiLHSArray; + for (int i = 0; i < *len; ++i) + { + *lhs = (*rhs >= *lhs) ? *lhs : *rhs; + } +} + +void max(void* mpiRHSArray, void* mpiLHSArray, int* len, MPI_Datatype*) +{ + // cast mpi type to target c++ type + auto* rhs = (float16_t*) mpiRHSArray; + auto* lhs = (float16_t*) mpiLHSArray; + for (int i = 0; i < *len; ++i) + { + *lhs = (*rhs <= *lhs) ? *lhs : *rhs; + } +} + +MPI_User_function* returnMPIUserFctPointer(const Operation op) +{ + switch (op) + { + case SUM: + return ∑ + case MIN: + return &min; + case MAX: + return &max; + default: + WALBERLA_ABORT("The chosen operation " << typeid(op).name() << " is not implemented for float16 yet."); + } +} + +} +} + +/// Here some MPI_Datatypes and MPI_Ops are initialized that are not part of the MPI Standard and therefore have to be +/// define yourself. This is done in the MPIManager, since they need to be freed before MPIFinalize is called and this +/// way it is easiest to keep track of them. +/// For more information about this feature compare MR !647 ( +/// https://i10git.cs.fau.de/walberla/walberla/-/merge_requests/647 ) + +/*! + * \brief Specialization of MPITrait for float16 + * + * The initialization of the self defined MPI_Datatype and MPI_Op is done in the MPIManager so that it can be freed + * before MPI is finalized. + */ +MPI_Datatype MPITrait< walberla::float16 >::type() +{ + +#ifdef WALBERLA_BUILD_WITH_MPI + // Since this type should be created only once, a static variable is used as safeguard. + static bool initializedType = false; + if (!initializedType) + { + // Since float16 consists of two Bytes, a continuous datatype with size of two byte is created. + mpi::MPIManager::instance()->commitCustomType< walberla::float16, const int >(2); + initializedType = true; + } + return mpi::MPIManager::instance()->getCustomType< walberla::float16 >(); +#else + return mpistubs::MPI_FLOAT16; +#endif +} + +MPI_Op MPITrait< walberla::float16 >::operation(const mpi::Operation& op) +{ + WALBERLA_MPI_SECTION() + { + // mpi::Operation is an enum and not an enum class, thus, it is not sufficient to make a just a bool variable as + // safeguard, since all operations are of type mpi::Operation and only the first one would pass the safeguard. + // Therefore, a set is created and each operation that is called the first time, will be initialized. + static std::set< mpi::Operation > operationInitializationRegister; + const bool needsInitialization = std::get< 1 >(operationInitializationRegister.emplace(op)); + if (needsInitialization) + { + mpi::MPIManager::instance()->commitCustomOperation< walberla::float16 >( + op, mpi::returnMPIUserFctPointer(op)); + } + return MPIManager::instance()->getCustomOperation< walberla::float16 >(op); + } + WALBERLA_NON_MPI_SECTION() { WALBERLA_ABORT("If MPI is not used, a custom operator should never be called."); } +} + +} // namespace walberla +#endif // WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT diff --git a/src/core/mpi/MPIWrapper.h b/src/core/mpi/MPIWrapper.h index eecee3136c702a61e7987d25705076a7c780a635..baf0c62d8ba65bd7bd91a665d5d5261e8323d778 100644 --- a/src/core/mpi/MPIWrapper.h +++ b/src/core/mpi/MPIWrapper.h @@ -23,8 +23,7 @@ #pragma once #include "core/Abort.h" - - +#include "core/mpi/Operation.h" /// \cond internal @@ -47,6 +46,7 @@ #endif + namespace walberla { namespace mpistubs { //empty namespace which can be used @@ -77,8 +77,6 @@ namespace mpistubs { #pragma warning ( pop ) #endif - - #else // WALBERLA_BUILD_WITH_MPI @@ -143,6 +141,10 @@ const MPI_Datatype MPI_UNSIGNED_LONG_LONG = 10; const MPI_Datatype MPI_FLOAT = 11; const MPI_Datatype MPI_DOUBLE = 12; const MPI_Datatype MPI_LONG_DOUBLE = 13; +#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT + const MPI_Datatype MPI_FLOAT16 = 14; +#endif + const int MPI_ORDER_C = 0; const int MPI_ORDER_FORTRAN = 0; @@ -151,16 +153,17 @@ const MPI_Datatype MPI_ANY_SOURCE = -2; const MPI_Datatype MPI_ANY_TAG = -1; const MPI_Datatype MPI_DATATYPE_NULL = 0; -const MPI_Op MPI_MIN = 100; -const MPI_Op MPI_MAX = 101; -const MPI_Op MPI_SUM = 102; -const MPI_Op MPI_PROD = 103; -const MPI_Op MPI_LAND = 104; -const MPI_Op MPI_BAND = 105; -const MPI_Op MPI_LOR = 106; -const MPI_Op MPI_BOR = 107; -const MPI_Op MPI_LXOR = 108; -const MPI_Op MPI_BXOR = 109; +const MPI_Op MPI_OP_NULL = 99; +const MPI_Op MPI_MIN = 100; +const MPI_Op MPI_MAX = 101; +const MPI_Op MPI_SUM = 102; +const MPI_Op MPI_PROD = 103; +const MPI_Op MPI_LAND = 104; +const MPI_Op MPI_BAND = 105; +const MPI_Op MPI_LOR = 106; +const MPI_Op MPI_BOR = 107; +const MPI_Op MPI_LXOR = 108; +const MPI_Op MPI_BXOR = 109; const int MPI_PACKED = 1; const int MPI_UNDEFINED = -1; @@ -265,6 +268,7 @@ inline int MPI_Type_get_extent(MPI_Datatype, MPI_Aint*, MPI_Aint*) { WALBERLA_MP inline int MPI_Type_create_struct(int, const int[], const MPI_Aint[], const MPI_Datatype[], MPI_Datatype*) { WALBERLA_MPI_FUNCTION_ERROR } inline int MPI_Op_create(MPI_User_function*, int, MPI_Op*) { WALBERLA_MPI_FUNCTION_ERROR } +inline int MPI_Op_free(MPI_Op*) { WALBERLA_MPI_FUNCTION_ERROR } inline int MPI_Get_processor_name( char*, int* ) { WALBERLA_MPI_FUNCTION_ERROR } @@ -307,58 +311,104 @@ namespace mpi { + + /*!\class MPITrait // \brief Base template for the MPITrait class // // The MPITrait class template offers a translation between the C++ built-in data types and -// the corresponding MPI data types required for send and receive operations. For a particular -// MPITrait instantiation, the corresponding MPI data type can be obtained by calling type() -// of the MPITrait. The following example demonstrates the application of the MPITrait class: +// the corresponding MPI data types its respective operation required for send, receive and reduce operations. +// For a particular MPITrait instantiation, the corresponding MPI data type can be obtained by calling type() +// as well as calling operation( const Operation& ) to the MPI operation corresponding to the MPI data type. +// The following example demonstrates the application of the MPITrait class: - \code +\code // Initialization of the MPI communication int* pi; // Integer buffer for the MPI send operation - ... // Initialization of the send buffer +... // Initialization of the send buffer // Sending 50 integers to process 0 MPI_Send( pi, 50, MPITrait< int >::type(), 0, 0, MPI_COMM_WORLD ); - \endcode -*/ +\endcode + */ template< typename T > -struct MPITrait; - - - -/// Macro for the creation of MPITrait specializations for the supported data types. +struct MPITrait +{ + static inline MPI_Datatype type(); + static inline MPI_Op operation(const mpi::Operation& ); +}; -#define WALBERLA_CREATE_MPITRAIT_SPECIALIZATION(CPP_TYPE,MPI_TYPE) \ +/// Macro for specialization of the MPI supported data types in MPITrait::type(). +#define WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(CPP_TYPE, MPI_TYPE) \ template<> \ - struct MPITrait< CPP_TYPE > \ + inline MPI_Datatype MPITrait< CPP_TYPE >::type() \ { \ - static inline MPI_Datatype type() { return (MPI_TYPE); } \ + return (MPI_TYPE); \ } - - // MPITRAIT SPECIALIZATIONS -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( char , MPI_CHAR ); -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( signed char , MPI_CHAR ); -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( signed short int , MPI_SHORT ); -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( signed int , MPI_INT ); -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( signed long int , MPI_LONG ); -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( signed long long , MPI_LONG_LONG ); -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( unsigned char , MPI_UNSIGNED_CHAR ); -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( unsigned short int , MPI_UNSIGNED_SHORT ); -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( unsigned int , MPI_UNSIGNED ); -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( unsigned long int , MPI_UNSIGNED_LONG ); -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( unsigned long long , MPI_UNSIGNED_LONG_LONG ); +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(char, MPI_CHAR) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(signed char, MPI_CHAR) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(signed short int, MPI_SHORT) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(signed int, MPI_INT) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(signed long int, MPI_LONG) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(signed long long, MPI_LONG_LONG) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(unsigned char, MPI_UNSIGNED_CHAR) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(unsigned short int, MPI_UNSIGNED_SHORT) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(unsigned int, MPI_UNSIGNED) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(unsigned long int, MPI_UNSIGNED_LONG) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(unsigned long long, MPI_UNSIGNED_LONG_LONG) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(float, MPI_FLOAT) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(double, MPI_DOUBLE) +WALBERLA_CREATE_MPITRAIT_TYPE_SPECIALIZATION(long double, MPI_LONG_DOUBLE) #ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT - WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( walberla::float16 , MPI_WCHAR ); +template<> struct MPITrait< float16 >{ + static MPI_Datatype type(); + static MPI_Op operation(const mpi::Operation& ); +}; +#endif + + +/*! + * \brief Specialization of the static operation() method of MPITrait. + * + * It chooses a MPI_Op depending on the value type of the object the operation is performed on. + * + * \param op The operation to be performed (op is an element of the enum mpi::Operation). + */ +template< typename T > +MPI_Op MPITrait< T >::operation(const mpi::Operation& op) +{ + switch (op) + { + case mpi::MIN: + return MPI_MIN; + case mpi::MAX: + return MPI_MAX; + case mpi::SUM: + return MPI_SUM; + case mpi::PRODUCT: + return MPI_PROD; + case mpi::LOGICAL_AND: + return MPI_LAND; + case mpi::BITWISE_AND: + return MPI_BAND; + case mpi::LOGICAL_OR: + return MPI_LOR; + case mpi::BITWISE_OR: + return MPI_BOR; + case mpi::LOGICAL_XOR: + return MPI_LXOR; + case mpi::BITWISE_XOR: + return MPI_BXOR; + default: + WALBERLA_ABORT("Unknown operation!"); + } +#ifdef __IBMCPP__ + return MPI_SUM; // never reached, helps to suppress a warning from the IBM compiler #endif -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( float , MPI_FLOAT ); -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( double , MPI_DOUBLE ); -WALBERLA_CREATE_MPITRAIT_SPECIALIZATION( long double , MPI_LONG_DOUBLE ); +} } // namespace walberla /// \endcond diff --git a/src/core/mpi/Operation.h b/src/core/mpi/Operation.h new file mode 100644 index 0000000000000000000000000000000000000000..f3097bb89fe4f3fcb3f4581d8435afe2b2b70ec8 --- /dev/null +++ b/src/core/mpi/Operation.h @@ -0,0 +1,27 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file Operation.h +//! \ingroup core +//! \author Michael Zikeli <michael.zikeli@fau.de> +// +//====================================================================================================================== +#pragma once + +namespace walberla::mpi +{ +// Note: I don't like at all that this is an enum and not an enum class, but changing this would be a major change in the framework. +enum Operation { MIN, MAX, SUM, PRODUCT, LOGICAL_AND, BITWISE_AND, LOGICAL_OR, BITWISE_OR, LOGICAL_XOR, BITWISE_XOR }; +} // namespace walberla::mpi \ No newline at end of file diff --git a/src/core/mpi/Reduce.h b/src/core/mpi/Reduce.h index 5e9bb8220112ff4bff9c19a02e66cb3c2d801d46..a0b6edb39cad16fdfa5c527f83f9fe007ba9fa2c 100644 --- a/src/core/mpi/Reduce.h +++ b/src/core/mpi/Reduce.h @@ -16,18 +16,19 @@ //! \file Reduce.h //! \ingroup core //! \author Christian Godenschwager <christian.godenschwager@fau.de> +//! \author Michael Zikeli <michael.zikeli@fau.de> // //====================================================================================================================== #pragma once -#include "BufferDataTypeExtensions.h" - #include "core/Abort.h" #include "core/DataTypes.h" #include "core/debug/Debug.h" -#include "core/mpi/MPIManager.h" #include "core/mpi/MPIWrapper.h" +#include "core/mpi/Operation.h" + +#include "BufferDataTypeExtensions.h" #include "core/math/Vector3.h" @@ -36,33 +37,10 @@ namespace walberla { -namespace mpi { - -enum Operation { MIN, MAX, SUM, PRODUCT, LOGICAL_AND, BITWISE_AND, LOGICAL_OR, BITWISE_OR, LOGICAL_XOR, BITWISE_XOR }; - -inline MPI_Op toMPI_Op( Operation operation ) +namespace mpi { - switch( operation ) - { - case MIN: return MPI_MIN; - case MAX: return MPI_MAX; - case SUM: return MPI_SUM; - case PRODUCT: return MPI_PROD; - case LOGICAL_AND: return MPI_LAND; - case BITWISE_AND: return MPI_BAND; - case LOGICAL_OR: return MPI_LOR; - case BITWISE_OR: return MPI_BOR; - case LOGICAL_XOR: return MPI_LXOR; - case BITWISE_XOR: return MPI_BXOR; - default: WALBERLA_ABORT( "Unknown operation!" ); - } -#ifdef __IBMCPP__ - return MPI_SUM; // never reached, helps to suppress a warning from the IBM compiler -#endif -} - //====================================================================================================================== /*! * \brief Reduces a value over all processes in-place @@ -91,11 +69,11 @@ void reduceInplace( T & value, Operation operation, int recvRank = 0, MPI_Comm c if( myRank == recvRank ) { - MPI_Reduce( MPI_IN_PLACE, &value, 1, MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( MPI_IN_PLACE, &value, 1, MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm ); } else { - MPI_Reduce( &value, nullptr, 1, MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( &value, nullptr, 1, MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm ); } } @@ -128,11 +106,11 @@ inline void reduceInplace( bool & value, Operation operation, int recvRank = 0, if( myRank == recvRank ) { - MPI_Reduce( MPI_IN_PLACE, &intValue, 1, MPITrait<int>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( MPI_IN_PLACE, &intValue, 1, MPITrait<int>::type(), MPITrait<int>::operation(operation), recvRank, comm ); } else { - MPI_Reduce( &intValue, nullptr, 1, MPITrait<int>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( &intValue, nullptr, 1, MPITrait<int>::type(), MPITrait<int>::operation(operation), recvRank, comm ); } value = intValue != 0; @@ -172,11 +150,11 @@ T reduce( const T value, Operation operation, int recvRank = 0, MPI_Comm comm = if( myRank == recvRank ) { - MPI_Reduce( const_cast<T*>( &value ), &result, 1, MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( const_cast<T*>( &value ), &result, 1, MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm ); } else { - MPI_Reduce( const_cast<T*>( &value ), nullptr, 1, MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( const_cast<T*>( &value ), nullptr, 1, MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm ); } return result; @@ -213,11 +191,11 @@ inline bool reduce( const bool value, Operation operation, int recvRank = 0, MPI if( myRank == recvRank ) { - MPI_Reduce( &intValue, &intResult, 1, MPITrait<int>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( &intValue, &intResult, 1, MPITrait<int>::type(), MPITrait<int>::operation(operation), recvRank, comm ); } else { - MPI_Reduce( &intValue, nullptr, 1, MPITrait<int>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( &intValue, nullptr, 1, MPITrait<int>::type(), MPITrait<int>::operation(operation), recvRank, comm ); } return intResult != 0; @@ -252,11 +230,11 @@ void reduceInplace( std::vector<T> & values, Operation operation, int recvRank = if( myRank == recvRank ) { - MPI_Reduce( MPI_IN_PLACE, values.empty() ? nullptr : &values[0], int_c( values.size() ), MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( MPI_IN_PLACE, values.empty() ? nullptr : &values[0], int_c( values.size() ), MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm ); } else { - MPI_Reduce( values.empty() ? nullptr : &values[0], nullptr, int_c( values.size() ), MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( values.empty() ? nullptr : &values[0], nullptr, int_c( values.size() ), MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm ); } } @@ -292,14 +270,14 @@ inline void reduceInplace( std::vector<bool> & values, Operation operation, int if( myRank == recvRank ) { - MPI_Reduce( MPI_IN_PLACE, sendBuffer.empty() ? nullptr : &sendBuffer[0], int_c( sendBuffer.size() ), MPITrait<uint8_t>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( MPI_IN_PLACE, sendBuffer.empty() ? nullptr : &sendBuffer[0], int_c( sendBuffer.size() ), MPITrait<uint8_t>::type(), MPITrait<uint8_t>::operation(operation), recvRank, comm ); size_t size = values.size(); convert( sendBuffer, values ); values.resize(size); } else { - MPI_Reduce( sendBuffer.empty() ? nullptr : &sendBuffer[0], nullptr, int_c( sendBuffer.size() ), MPITrait<uint8_t>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( sendBuffer.empty() ? nullptr : &sendBuffer[0], nullptr, int_c( sendBuffer.size() ), MPITrait<uint8_t>::type(), MPITrait<uint8_t>::operation(operation), recvRank, comm ); } } @@ -331,11 +309,11 @@ void reduceInplace( math::Vector3<T> & values, Operation operation, int recvRank if( myRank == recvRank ) { - MPI_Reduce( MPI_IN_PLACE, values.data(), 3, MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( MPI_IN_PLACE, values.data(), 3, MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm ); } else { - MPI_Reduce( values.data(), nullptr, 3, MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( values.data(), nullptr, 3, MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm ); } } @@ -367,11 +345,11 @@ inline void reduceInplace( math::Vector3<bool> & values, Operation operation, in if( myRank == recvRank ) { - MPI_Reduce( MPI_IN_PLACE, intValues.data(), 3, MPITrait<int>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( MPI_IN_PLACE, intValues.data(), 3, MPITrait<int>::type(), MPITrait<int>::operation(operation), recvRank, comm ); } else { - MPI_Reduce( intValues.data(), nullptr, 3, MPITrait<int>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( intValues.data(), nullptr, 3, MPITrait<int>::type(), MPITrait<int>::operation(operation), recvRank, comm ); } for(uint_t i = 0; i < 3; ++i) @@ -411,11 +389,11 @@ math::Vector3<T> reduce( const math::Vector3<T> & values, Operation operation, i if( myRank == recvRank ) { - MPI_Reduce( const_cast<T*>( values.data() ), result.data(), 3, MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( const_cast<T*>( values.data() ), result.data(), 3, MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm ); } else { - MPI_Reduce( const_cast<T*>( values.data() ), nullptr, 3, MPITrait<T>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( const_cast<T*>( values.data() ), nullptr, 3, MPITrait<T>::type(), MPITrait<T>::operation(operation), recvRank, comm ); } return result; @@ -452,14 +430,14 @@ inline math::Vector3<bool> reduce( const math::Vector3<bool> & values, Operation if( myRank == recvRank ) { - MPI_Reduce( MPI_IN_PLACE, intValues.data(), 3, MPITrait<int>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( MPI_IN_PLACE, intValues.data(), 3, MPITrait<int>::type(), MPITrait<int>::operation(operation), recvRank, comm ); for(uint_t i = 0; i < 3; ++i) results[i] = intValues[i] != 0; } else { - MPI_Reduce( intValues.data(), nullptr, 3, MPITrait<int>::type(), toMPI_Op(operation), recvRank, comm ); + MPI_Reduce( intValues.data(), nullptr, 3, MPITrait<int>::type(), MPITrait<int>::operation(operation), recvRank, comm ); } return results; @@ -487,7 +465,7 @@ T allReduce( const T & value, Operation operation, MPI_Comm comm = MPI_COMM_WORL WALBERLA_NON_MPI_SECTION() { return value; } T result; - MPI_Allreduce( const_cast<T*>( &value ), &result, 1, MPITrait<T>::type(), toMPI_Op(operation), comm ); + MPI_Allreduce( const_cast<T*>( &value ), &result, 1, MPITrait<T>::type(), MPITrait<T>::operation(operation), comm ); return result; } @@ -514,7 +492,7 @@ inline bool allReduce( const bool value, Operation operation, MPI_Comm comm = MP int intValue = value ? 1 : 0; - MPI_Allreduce( MPI_IN_PLACE, &intValue, 1, MPITrait<int>::type(), toMPI_Op(operation), comm ); + MPI_Allreduce( MPI_IN_PLACE, &intValue, 1, MPITrait<int>::type(), MPITrait<int>::operation(operation), comm ); return intValue != 0; } @@ -539,7 +517,7 @@ void allReduceInplace( T & value, Operation operation, MPI_Comm comm = MPI_COMM_ WALBERLA_NON_MPI_SECTION() { return; } - MPI_Allreduce( MPI_IN_PLACE, &value, 1, MPITrait<T>::type(), toMPI_Op(operation), comm ); + MPI_Allreduce( MPI_IN_PLACE, &value, 1, MPITrait<T>::type(), MPITrait<T>::operation(operation), comm ); } @@ -562,7 +540,7 @@ inline void allReduceInplace( bool & value, Operation operation, MPI_Comm comm = int intValue = value ? 1 : 0; - MPI_Allreduce( MPI_IN_PLACE, &intValue, 1, MPITrait<int>::type(), toMPI_Op(operation), comm ); + MPI_Allreduce( MPI_IN_PLACE, &intValue, 1, MPITrait<int>::type(), MPITrait<int>::operation(operation), comm ); value = intValue != 0; } @@ -587,7 +565,7 @@ void allReduceInplace( std::vector<T> & values, Operation operation, MPI_Comm co WALBERLA_NON_MPI_SECTION() { return; } - MPI_Allreduce( MPI_IN_PLACE, values.empty() ? nullptr : &values[0], int_c( values.size() ), MPITrait<T>::type(), toMPI_Op(operation), comm ); + MPI_Allreduce( MPI_IN_PLACE, values.empty() ? nullptr : &values[0], int_c( values.size() ), MPITrait<T>::type(), MPITrait<T>::operation(operation), comm ); } @@ -612,7 +590,7 @@ inline void allReduceInplace( std::vector<bool> & bools, Operation operation, MP std::vector<uint8_t> sendBuffer; convert( bools, sendBuffer ); - MPI_Allreduce( MPI_IN_PLACE, sendBuffer.empty() ? nullptr : &sendBuffer[0], int_c( sendBuffer.size() ), MPITrait<uint8_t>::type(), toMPI_Op(operation), comm ); + MPI_Allreduce( MPI_IN_PLACE, sendBuffer.empty() ? nullptr : &sendBuffer[0], int_c( sendBuffer.size() ), MPITrait<uint8_t>::type(), MPITrait<uint8_t>::operation(operation), comm ); auto size = bools.size(); convert(sendBuffer, bools); bools.resize(size); @@ -637,7 +615,7 @@ void allReduceInplace( math::Vector3<T> & values, Operation operation, MPI_Comm WALBERLA_NON_MPI_SECTION() { return; } - MPI_Allreduce( MPI_IN_PLACE, values.data(), 3, MPITrait<T>::type(), toMPI_Op(operation), comm ); + MPI_Allreduce( MPI_IN_PLACE, values.data(), 3, MPITrait<T>::type(), MPITrait<T>::operation(operation), comm ); } @@ -663,7 +641,7 @@ inline void allReduceInplace( math::Vector3<bool> & bools, Operation operation, math::Vector3<int> intValues{bools[0] ? 1 : 0, bools[1] ? 1 : 0, bools[2] ? 1 : 0}; - MPI_Allreduce( MPI_IN_PLACE, intValues.data(), 3, MPITrait<int>::type(), toMPI_Op(operation), comm ); + MPI_Allreduce( MPI_IN_PLACE, intValues.data(), 3, MPITrait<int>::type(), MPITrait<int>::operation(operation), comm ); for(uint_t i = 0; i < 3; ++i) { diff --git a/src/core/timing/Timer.h b/src/core/timing/Timer.h index 9f7c3f97d1066ff7ffa322cb5d6550c9f5d5013b..efc7016409a6e91d8c83b96aebfbd9fc40c4a8cf 100644 --- a/src/core/timing/Timer.h +++ b/src/core/timing/Timer.h @@ -30,6 +30,7 @@ #include "WcPolicy.h" #include "core/DataTypes.h" +#include "core/mpi/MPIManager.h" #include "core/mpi/RecvBuffer.h" #include "core/mpi/Reduce.h" #include "core/mpi/SendBuffer.h" @@ -527,7 +528,7 @@ shared_ptr<Timer<TP> > getReduced( Timer<TP>& timer, ReduceType rt, int targetRa } //uint_t counter, double min, double max, double total, double sumOfSquares - if ( targetRank < 0 || targetRank == MPIManager::instance()->worldRank() ) + if ( targetRank < 0 || targetRank == mpi::MPIManager::instance()->worldRank() ) return make_shared<Timer<TP> >( mpi::MPIManager::instance()->numProcesses(), min, max, total, sumOfSquares ); return nullptr; diff --git a/src/field/distributors/KernelDistributor.h b/src/field/distributors/KernelDistributor.h index 712d9a7b7e32ae96b0d259873d102166675b22b3..bc0abb8fa5882e51960d936132cd3613d55f5172 100644 --- a/src/field/distributors/KernelDistributor.h +++ b/src/field/distributors/KernelDistributor.h @@ -62,7 +62,7 @@ public: WALBERLA_ASSERT(baseField.nrOfGhostLayers() > uint_t(0), "field for kernel distribution needs at least one ghost layer"); } - inline bool operator==( const OwnType & other ){ return baseField_ == other.baseField_; } + inline bool operator==( const OwnType & other ) const { return baseField_ == other.baseField_; } template< typename ForwardIterator_T > inline void distribute( const Vector3<real_t> & position, ForwardIterator_T distributeValueBegin ) diff --git a/src/field/distributors/NearestNeighborDistributor.h b/src/field/distributors/NearestNeighborDistributor.h index 932f443b3e30af9ab6a2bc4f6c6f3f585e485473..c4819cb9119d46e46dbc34bed40228664bb5cffb 100644 --- a/src/field/distributors/NearestNeighborDistributor.h +++ b/src/field/distributors/NearestNeighborDistributor.h @@ -59,7 +59,7 @@ public: : blockStorage_( blockStorage ), block_( block ), baseField_( baseField ), flagField_( flagField ), evaluationMask_( evaluationMask ) {} - inline bool operator==( const OwnType & other ){ return baseField_ == other.baseField_; } + inline bool operator==( const OwnType & other ) const { return baseField_ == other.baseField_; } template< typename ForwardIterator_T > inline void distribute( const Vector3<real_t> & position, ForwardIterator_T distributeValueBegin ) diff --git a/src/field/interpolators/KernelFieldInterpolator.h b/src/field/interpolators/KernelFieldInterpolator.h index 0e59fabf21dfd4899e62ca29961811a0e99e59d9..0f5987e76e844df9505869fbaab3feadacbc88fa 100644 --- a/src/field/interpolators/KernelFieldInterpolator.h +++ b/src/field/interpolators/KernelFieldInterpolator.h @@ -105,7 +105,7 @@ public: } - inline bool operator==( const OwnType & other ){ return baseField_ == other.baseField_; } + inline bool operator==( const OwnType & other ) const { return baseField_ == other.baseField_; } template< typename ForwardIterator_T > inline void get( const Vector3<real_t> & position, ForwardIterator_T interpolationResultBegin ) diff --git a/src/field/interpolators/NearestNeighborFieldInterpolator.h b/src/field/interpolators/NearestNeighborFieldInterpolator.h index b5b5cba7f65a3e06517356933d157108ba81e90c..bb08276f9f12949a10461e96b1a815acd64f2559 100644 --- a/src/field/interpolators/NearestNeighborFieldInterpolator.h +++ b/src/field/interpolators/NearestNeighborFieldInterpolator.h @@ -57,7 +57,7 @@ public: {} - inline bool operator==( const OwnType & other ){ return baseField_ == other.baseField_; } + inline bool operator==( const OwnType & other ) const { return baseField_ == other.baseField_; } template< typename ForwardIterator_T > inline void get( const Vector3<real_t> & position, ForwardIterator_T interpolationResultBegin ) diff --git a/src/field/interpolators/TrilinearFieldInterpolator.h b/src/field/interpolators/TrilinearFieldInterpolator.h index e9809d835f1bf67e89da8a0eb2b5a9493624c84e..351ed7afea09db1c75feeb59dd771afb0796949e 100644 --- a/src/field/interpolators/TrilinearFieldInterpolator.h +++ b/src/field/interpolators/TrilinearFieldInterpolator.h @@ -62,7 +62,7 @@ public: } - inline bool operator==( const OwnType & other ){ return baseField_ == other.baseField_; } + inline bool operator==( const OwnType & other ) const { return baseField_ == other.baseField_; } template< typename ForwardIterator_T > inline void get( const Vector3<real_t> & position, ForwardIterator_T interpolationResultBegin ) diff --git a/tests/core/CMakeLists.txt b/tests/core/CMakeLists.txt index 240fe56dc9195983d88f4930e7e4bcb049f009e5..604de6371b573b350638ea4fb4f003d93960cb7c 100644 --- a/tests/core/CMakeLists.txt +++ b/tests/core/CMakeLists.txt @@ -119,6 +119,11 @@ waLBerla_execute_test( NAME SetReductionTest4 COMMAND $<TARGET_FILE:SetReductio waLBerla_execute_test( NAME SetReductionTest5 COMMAND $<TARGET_FILE:SetReductionTest> PROCESSES 5 ) waLBerla_execute_test( NAME SetReductionTest27 COMMAND $<TARGET_FILE:SetReductionTest> PROCESSES 27 ) +if ( WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT ) + waLBerla_compile_test( Name MPIFloat16Test FILES mpi/MPIFloat16Test.cpp DEPENDS core ) + target_compile_features( MPIFloat16Test PUBLIC cxx_std_23 ) + waLBerla_execute_test( NAME MPIFloat16Test4 COMMAND $<TARGET_FILE:MPIFloat16Test> PROCESSES 4) +endif () ############## diff --git a/tests/core/FP16Test.cpp b/tests/core/FP16Test.cpp index 60a2be0eeee0872449f6a648fa1c65abbbda7f42..e08dd55b099c8edc36f97ab4f86a613d6671d1ac 100644 --- a/tests/core/FP16Test.cpp +++ b/tests/core/FP16Test.cpp @@ -70,7 +70,7 @@ void fp16Test( int argc, char ** argv ) const float16 y = -1.8f16; const float64 z = -0.6; WALBERLA_LOG_INFO_ON_ROOT(" + " << (double) x << " + " << (double) y << " == " << (float64) (x + y) << " ? ") - WALBERLA_CHECK_FLOAT_EQUAL((float64) (x + y), z, "float16 addition does not work correctly."); + WALBERLA_CHECK_FLOAT_EQUAL( (x + y), (float16) z, "float16 addition does not work correctly."); #endif } diff --git a/tests/core/Float16SupportTest.cpp b/tests/core/Float16SupportTest.cpp index 04ea9378f54eee4ee78f81177fc609d732da21c5..5116886ff4e154cecb21465acc39e40caea776dc 100644 --- a/tests/core/Float16SupportTest.cpp +++ b/tests/core/Float16SupportTest.cpp @@ -19,14 +19,16 @@ // //====================================================================================================================== -#include <memory> -#include <numeric> - #include "core/DataTypes.h" #include "core/Environment.h" #include "core/logging/Logging.h" +#include <numeric> + namespace walberla::simple_Float16_test { + + +#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT using walberla::floatIsEqual; using walberla::real_t; using walberla::uint_c; @@ -90,12 +92,12 @@ void vector_test() fpDst_cast.assign( 10, (dst_t) 1.5 ); fp32.assign( 10, 1.5f ); std::copy( fpSrc.begin(), fpSrc.end(), fpDst.begin() ); - WALBERLA_LOG_WARNING_ON_ROOT( + WALBERLA_LOG_INFO_ON_ROOT( " std::vector.assign is not able to assign " << typeid( src_t ).name() << " values to container of type " << precisionType << ".\n" << " Therefore, the floating point value for assign must be cast beforehand or std::copy must be used, since it uses a static_cast internally." ); - fpSrc[5] = 2.3; + fpSrc[5] = real_c(2.3); fpDst_cast[5] = (dst_t) 2.3; fp32[5] = 2.3f; fpDst[5] = (dst_t) 2.3; @@ -118,7 +120,7 @@ void vector_test() WALBERLA_CHECK_FLOAT_EQUAL( (dst_t)sumSrc, sumDst ); } { - fpSrc.assign( 13, 1.3 ); + fpSrc.assign(13, real_c(1.3)); std::copy( fpSrc.begin(), fpSrc.end(), fpDst.begin() ); const auto sumSrc = std::reduce(fpSrc.begin(), fpSrc.end()); const auto sumDst = std::reduce(fpDst.begin(), fpDst.end()); @@ -126,8 +128,11 @@ void vector_test() } } // simple_Float16_test::vector_test() +#endif // WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT + int main( int argc, char** argv ) { +#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT // This check only works since C++23 and is used in many implementations, so it's important, that it works. WALBERLA_CHECK( std::is_arithmetic< dst_t >::value ); @@ -149,15 +154,17 @@ int main( int argc, char** argv ) WALBERLA_LOG_INFO_ON_ROOT( " Start a where float32 is sufficient but float16 is not." ); WALBERLA_CHECK_FLOAT_UNEQUAL( dst_t(1.0)-dst_t(0.3), 1.0-0.3 ); WALBERLA_CHECK_FLOAT_EQUAL( 1.0f-0.3f, 1.0-0.3 ); +#else + WALBERLA_LOG_WARNING_ON_ROOT( "\nWALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT is not enabled. So this test cannot be run!\n" ) +#endif // WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT - return 0; + return EXIT_SUCCESS; } // simple_Float16_test::main() } // namespace walberla::simple_Float16_test int main( int argc, char** argv ) { - walberla::simple_Float16_test::main( argc, argv ); + return walberla::simple_Float16_test::main( argc, argv ); - return EXIT_SUCCESS; } // main() diff --git a/tests/core/mpi/MPIFloat16Test.cpp b/tests/core/mpi/MPIFloat16Test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..be0bc5aa4f86eacc22f223c165f1dd943d5dbd56 --- /dev/null +++ b/tests/core/mpi/MPIFloat16Test.cpp @@ -0,0 +1,162 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file MPIFloat16.cpp +//! \ingroup core +//! \author Michael Zikeli <michael.zikeli@fau.de> +//! \brief This test is to check whether the self defined MPI_Datatype and the self defined Operators are working. +//! To verify the type, some parts of the BufferSystemTest are just copied. +//! To verify the operations, a simple AllReduce is executed for each operation. +//! For now only { SUM, MIN, MAX } are implemented, thus only those are tested. +// +//====================================================================================================================== + +#include "core/Abort.h" +#include "core/debug/TestSubsystem.h" +#include "core/logging/Logging.h" +#include "core/mpi/BufferSystem.h" +#include "core/mpi/Environment.h" +#include "core/mpi/Reduce.h" + + +namespace walberla::mpifloat16test +{ + +using mpi::BufferSystem; +using namespace std::literals::chrono_literals; + +#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT +void symmetricCommunication() +{ + const int MSG_SIZE = 10; + + auto mpiManager = MPIManager::instance(); + + const int numProcesses = mpiManager->numProcesses(); + const int rank = mpiManager->worldRank(); + const int leftNeighbor = (rank-1+numProcesses) % numProcesses; + const int rightNeighbor = (rank+1) % numProcesses; + + WALBERLA_CHECK_GREATER_EQUAL( numProcesses, 3 ); + + + BufferSystem bs ( MPI_COMM_WORLD ); + + // Pack Message to left neighbor containing own rank + for( int i=0; i< MSG_SIZE; ++i ) + bs.sendBuffer( leftNeighbor ) << numeric_cast<float16>(rank) + float16(0.3); + + // Pack Message to right neighbor containing own rank + for( int i=0; i< MSG_SIZE; ++i ) + bs.sendBuffer( rightNeighbor ) << numeric_cast<float16>(rank) - float16(0.3); + + bs.setReceiverInfoFromSendBufferState( true, false ); + bs.sendAll(); + + for( auto it = bs.begin(); it != bs.end(); ++it ) + { + WALBERLA_CHECK ( it.rank() == leftNeighbor || it.rank() == rightNeighbor ); + WALBERLA_CHECK_EQUAL( it.buffer().size(), MSG_SIZE * sizeof(float16) + MSG_SIZE * mpi::BUFFER_DEBUG_OVERHEAD ); + + auto receivedVal = float16(-1); + it.buffer() >> receivedVal; + + WALBERLA_CHECK_EQUAL( typeid(receivedVal), typeid(float16) ); + + if ( it.rank() == leftNeighbor ) + { + WALBERLA_CHECK_FLOAT_EQUAL( receivedVal, numeric_cast<float16>( it.rank() ) - float16(0.3) ); + WALBERLA_CHECK_FLOAT_UNEQUAL( receivedVal, numeric_cast<float16>( it.rank() ) + float16(0.3), 0.5); + } else { + WALBERLA_CHECK_FLOAT_EQUAL( receivedVal, numeric_cast<float16>( it.rank() ) + float16(0.3) ); + WALBERLA_CHECK_FLOAT_UNEQUAL( receivedVal, numeric_cast<float16>( it.rank() ) - float16(0.3), 0.5); + } + } + + WALBERLA_CHECK_EQUAL( bs.getBytesSent(), (MSG_SIZE * sizeof(float16) + MSG_SIZE * mpi::BUFFER_DEBUG_OVERHEAD) * 2 ); + WALBERLA_CHECK_EQUAL( bs.getBytesReceived(), (MSG_SIZE * sizeof(float16) + MSG_SIZE * mpi::BUFFER_DEBUG_OVERHEAD) * 2 ); +} + +void reduce( ) +{ + + auto mpiManager = MPIManager::instance(); + + const int numProcesses = mpiManager->numProcesses(); + const int rank = mpiManager->worldRank(); + + const auto startValue = numeric_cast<float16>(rank) + float16(0.3); + + // SUM + auto value = startValue; + + walberla::mpi::allReduceInplace( value, walberla::mpi::SUM ); + + auto sum = float16( 0.0 ); + for( int i = 0; i < numProcesses; ++i ) + sum += numeric_cast<float16>(i) + float16(0.3); + WALBERLA_CHECK_FLOAT_EQUAL( value, sum ); + WALBERLA_CHECK_FLOAT_UNEQUAL( value, ((numProcesses*(numProcesses-1)/2.)+0.3*numProcesses), 1e-10 ); + + // MIN + value = startValue; + + walberla::mpi::allReduceInplace( value, walberla::mpi::MIN ); + WALBERLA_CHECK_FLOAT_EQUAL( value, numeric_cast<float16>(0.3) ); + + // MAX + value = startValue; + + walberla::mpi::allReduceInplace( value, walberla::mpi::MAX ); + WALBERLA_CHECK_FLOAT_EQUAL( value, numeric_cast<float16>(numProcesses-1)+numeric_cast<float16>(0.3) ); + +} +#endif // WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT + +int main( int argc, char** argv ) +{ +#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT + mpi::Environment mpiEnv( argc, argv ); + debug::enterTestMode(); + walberla::logging::Logging::instance()->setLogLevel( walberla::logging::Logging::INFO ); + + auto mpiManager = MPIManager::instance(); + auto numProcesses = mpiManager->numProcesses(); + + if(numProcesses <= 2) + { + WALBERLA_ABORT("This test has to be executed on at least 3 processes. Executed on " << numProcesses); + return EXIT_FAILURE; + } + + WALBERLA_LOG_INFO_ON_ROOT("Testing Symmetric Communication..."); + symmetricCommunication(); + + WALBERLA_LOG_INFO_ON_ROOT("Testing Reduce operations..."); + reduce( ); +#else + WALBERLA_LOG_WARNING_ON_ROOT( "\nWALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT is not enabled. So this test cannot be run!\n" ) +#endif // WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT + + return EXIT_SUCCESS; + +} // mpifloat16test::main() + +} // namespace walberla::mpifloat16test + +int main( int argc, char** argv ) +{ + return walberla::mpifloat16test::main( argc, argv ); +} // main()