diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 4a44474058c2ce51ba80bdd8651fc7d3ba8624a5..ae96208da8541f44fc3d5cd468a07694cf2f003d 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,6 +1,6 @@
 ###############################################################################
 ##                                                                           ##
-##    Genral settings                                                        ##
+##    General settings                                                       ##
 ##                                                                           ##
 ###############################################################################
 
@@ -970,6 +970,71 @@ msvc-14.1_MpiOnly:
       - triggers
 
 
+###############################################################################
+##                                                                           ##
+##    macOS Builds                                                           ##
+##                                                                           ##
+###############################################################################
+
+
+.mac_build_template: &mac_build_definition
+   script:
+      - export NUM_CORES=$(system_profiler SPHardwareDataType | grep 'Total Number of Cores' | awk '{print $5}')
+      - export MAX_BUILD_CORES=$(( $(system_profiler SPHardwareDataType | grep 'Memory' | awk '{print $2}') / 4 ))
+      - "[[ $MAX_BUILD_CORES -lt $NUM_CORES ]] && export NUM_BUILD_CORES=$MAX_BUILD_CORES || export NUM_BUILD_CORES=$NUM_CORES"
+      - c++ --version
+      - cmake --version
+      - mpirun --version
+      - mkdir build
+      - cd build
+      - cmake .. -DWALBERLA_BUILD_TESTS=ON -DWALBERLA_BUILD_BENCHMARKS=ON -DWALBERLA_BUILD_TUTORIALS=ON -DWALBERLA_BUILD_TOOLS=ON -DWALBERLA_BUILD_WITH_MPI=$WALBERLA_BUILD_WITH_MPI -DWALBERLA_BUILD_WITH_PYTHON=$WALBERLA_BUILD_WITH_PYTHON -DWALBERLA_BUILD_WITH_OPENMP=$WALBERLA_BUILD_WITH_OPENMP -DWALBERLA_BUILD_WITH_CUDA=$WALBERLA_BUILD_WITH_CUDA -DCMAKE_BUILD_TYPE=$CMAKE_BUILD_TYPE -DWARNING_ERROR=ON
+      - cmake . -LAH
+      - make -j $NUM_BUILD_CORES -l $NUM_CORES
+      - ctest -LE "$CTEST_EXCLUDE_LABELS|cuda" -C $CMAKE_BUILD_TYPE --output-on-failure -j $NUM_CORES
+   tags:
+      - mac
+
+mac_Serial_Dbg:
+   <<: *mac_build_definition
+   variables:
+      CMAKE_BUILD_TYPE: "DebugOptimized"
+      CTEST_EXCLUDE_LABELS: "longrun"
+      WALBERLA_BUILD_WITH_MPI: "OFF"
+      WALBERLA_BUILD_WITH_OPENMP: "OFF"
+      WALBERLA_BUILD_WITH_PYTHON: "ON"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
+
+mac_Serial:
+   <<: *mac_build_definition
+   variables:
+      CMAKE_BUILD_TYPE: "Release"
+      CTEST_EXCLUDE_LABELS: "longrun"
+      WALBERLA_BUILD_WITH_MPI: "OFF"
+      WALBERLA_BUILD_WITH_OPENMP: "OFF"
+      WALBERLA_BUILD_WITH_PYTHON: "ON"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
+
+mac_MpiOnly_Dbg:
+   <<: *mac_build_definition
+   variables:
+      CMAKE_BUILD_TYPE: "DebugOptimized"
+      CTEST_EXCLUDE_LABELS: "longrun"
+      WALBERLA_BUILD_WITH_MPI: "ON"
+      WALBERLA_BUILD_WITH_OPENMP: "OFF"
+      WALBERLA_BUILD_WITH_PYTHON: "ON"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
+
+mac_MpiOnly:
+   <<: *mac_build_definition
+   variables:
+      CMAKE_BUILD_TYPE: "Release"
+      CTEST_EXCLUDE_LABELS: "longrun"
+      WALBERLA_BUILD_WITH_MPI: "ON"
+      WALBERLA_BUILD_WITH_OPENMP: "OFF"
+      WALBERLA_BUILD_WITH_PYTHON: "ON"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
+
+
 ###############################################################################
 ##                                                                           ##
 ##    Deploy jobs                                                            ##
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6ad1c28c18ca6f2b44b4f436bc654ab63d25b2c8..85125dcaf83ccf81a7ef108eb38f92b4b3ee89ed 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -360,17 +360,6 @@ if( NOT WARNING_DEPRECATED)
    endif()
 endif()
 
-# Treat warnings as errors
-if ( WARNING_ERROR )
-   if( WALBERLA_CXX_COMPILER_IS_GNU OR WALBERLA_CXX_COMPILER_IS_INTEL OR WALBERLA_CXX_COMPILER_IS_CLANG )
-      add_flag ( CMAKE_CXX_FLAGS "-pedantic-errors -Werror" )
-   elseif( WALBERLA_CXX_COMPILER_IS_MSVC )
-      add_flag ( CMAKE_CXX_FLAGS "/WX" )
-   elseif ( WALBERLA_CXX_COMPILER_IS_CRAY )
-      add_flag ( CMAKE_CXX_FLAGS "-h error_on_warning" )
-   endif()
-endif ( )
-
 
 if ( WALBERLA_CXX_COMPILER_IS_CLANG )
     add_flag ( CMAKE_CXX_FLAGS "-Wall -Wconversion -Wshadow -Wno-c++11-extensions -Qunused-arguments" )
@@ -784,18 +773,16 @@ if ( WALBERLA_BUILD_WITH_MPI AND NOT WALBERLA_CXX_COMPILER_IS_MPI_WRAPPER )
       if ( WIN32 )
          message ( STATUS "Enter Workaround Routine for Windows and OpenMPI: PRESS CONFIGURE ONE MORE TIME!" )
          string ( REGEX REPLACE "(.*)/bin/.*" "\\1" MPI_PATH ${MPI_CXX_COMPILER} )
-         find_path ( MPI_INCLUDE_PATH mpi.h
+         find_path ( MPI_C_INCLUDE_PATH mpi.h
             HINTS ${MPI_PATH}
             PATH_SUFFIXES include Inc)
-         set ( MPI_CXX_INCLUDE_PATH ${MPI_INCLUDE_PATH} CACHE FILEPATH "" FORCE )
-         set ( MPI_C_INCLUDE_PATH   ${MPI_INCLUDE_PATH} CACHE FILEPATH "" FORCE )
+         set ( MPI_CXX_INCLUDE_PATH ${MPI_C_INCLUDE_PATH} CACHE FILEPATH "" FORCE )
 
          set ( MPI_CXX_LIBRARIES "MPI_CXX_LIBRARIES-NOTFOUND" CACHE FILEPATH "Cleared" FORCE )
          find_library ( MPI_CXX_LIBRARIES
             NAMES         mpi++ mpicxx cxx mpi_cxx libmpi++ libmpicxx libcxx libmpi_cxx
             HINTS         ${MPI_PATH}
             PATH_SUFFIXES lib )
-         set ( MPI_LIBRARY "MPI_CXX_LIBRARIES" CACHE FILEPATH "" FORCE )
 
          if ( NOT MPI_CXX_LIBRARIES STREQUAL "MPI_CXX_LIBRARIES-NOTFOUND" )
             set ( MPI_CXX_FOUND ON FORCE )
@@ -806,7 +793,6 @@ if ( WALBERLA_BUILD_WITH_MPI AND NOT WALBERLA_CXX_COMPILER_IS_MPI_WRAPPER )
            NAMES         mpi mpich mpich2 msmpi libmpi libmpich libmpich2 libmsmpi
            HINTS         ${MPI_PATH}
            PATH_SUFFIXES lib )
-         set ( MPI_EXTRA_LIBRARY "MPI_C_LIBRARIES" CACHE FILEPATH "" FORCE )
 
          if ( NOT MPI_C_LIBRARIES STREQUAL "MPI_C_LIBRARIES-NOTFOUND" )
             set ( MPI_C_FOUND ON FORCE )
@@ -823,23 +809,14 @@ if ( WALBERLA_BUILD_WITH_MPI AND NOT WALBERLA_CXX_COMPILER_IS_MPI_WRAPPER )
    endif ( )
 
    if ( MPI_FOUND )
-       if ( MPI_CXX_FOUND )
-         include_directories ( SYSTEM ${MPI_CXX_INCLUDE_PATH} ${MPI_C_INCLUDE_PATH} )
-         foreach( LIB ${MPI_C_LIBRARIES} ${MPI_CXX_LIBRARIES} )
-           if ( LIB )
-              list ( APPEND SERVICE_LIBS ${LIB} )
-           endif ( )
-         endforeach ( )
-         add_flag ( CMAKE_CXX_FLAGS "${MPI_CXX_COMPILE_FLAGS}" )
-         add_flag ( CMAKE_C_FLAGS   "${MPI_C_COMPILE_FLAGS}" )
-      else ( ) # For older CMake versions
-         include_directories ( SYSTEM ${MPI_INCLUDE_PATH} )
-         list ( APPEND SERVICE_LIBS ${MPI_LIBRARY} )
-         if ( MPI_EXTRA_LIBRARY )
-            list ( APPEND SERVICE_LIBS ${MPI_EXTRA_LIBRARY} )
+     include_directories ( SYSTEM ${MPI_CXX_INCLUDE_PATH} ${MPI_C_INCLUDE_PATH} )
+     foreach( LIB ${MPI_C_LIBRARIES} ${MPI_CXX_LIBRARIES} )
+         if ( LIB )
+            list ( APPEND SERVICE_LIBS ${LIB} )
          endif ( )
-         add_flag ( CMAKE_C_FLAGS "${MPI_COMPILE_FLAGS}" )
-      endif ( )
+     endforeach ( )
+     add_flag ( CMAKE_CXX_FLAGS "${MPI_CXX_COMPILE_FLAGS}" )
+     add_flag ( CMAKE_C_FLAGS   "${MPI_C_COMPILE_FLAGS}" )
 
      add_flag ( CMAKE_MODULE_LINKER_FLAGS "${MPI_CXX_LINK_FLAGS}" )
      add_flag ( CMAKE_EXE_LINKER_FLAGS    "${MPI_CXX_LINK_FLAGS}" )
@@ -847,7 +824,7 @@ if ( WALBERLA_BUILD_WITH_MPI AND NOT WALBERLA_CXX_COMPILER_IS_MPI_WRAPPER )
 
      # When using Intel MPI, mpi.h has to be included before including the standard library
      # therefore we use the -include flag to enforce this.
-     if ( MPI_INCLUDE_PATH MATCHES "intel" )
+     if ( MPI_C_INCLUDE_PATH MATCHES "intel" )
          message (STATUS "Activating IntelMPI include workaround for mpi.h" )
          add_flag ( CMAKE_CXX_FLAGS "-include mpi.h" )
          add_flag ( CMAKE_C_FLAGS   "-include mpi.h" )
@@ -1233,7 +1210,23 @@ if ( WALBERLA_BUILD_WITH_LTO  )
    endif( )
 
 endif ( )
+
 ############################################################################################################################
+##
+##  Some more compiler flags that need to happen after any try_compile (e.g. inside FindMPI)
+##
+############################################################################################################################
+
+# Treat warnings as errors
+if ( WARNING_ERROR )
+   if( WALBERLA_CXX_COMPILER_IS_GNU OR WALBERLA_CXX_COMPILER_IS_INTEL OR WALBERLA_CXX_COMPILER_IS_CLANG )
+      add_flag ( CMAKE_CXX_FLAGS "-pedantic-errors -Werror" )
+   elseif( WALBERLA_CXX_COMPILER_IS_MSVC )
+      add_flag ( CMAKE_CXX_FLAGS "/WX" )
+   elseif ( WALBERLA_CXX_COMPILER_IS_CRAY )
+      add_flag ( CMAKE_CXX_FLAGS "-h error_on_warning" )
+   endif()
+endif ( )
 
 ############################################################################################################################
 ##
diff --git a/cmake/waLBerlaHelperFunctions.cmake b/cmake/waLBerlaHelperFunctions.cmake
index face2f95f2fdb187cefc274fe314bfc20875b5d0..4d1dafe0e5ce02f35d55151550750fcf3dcdcef2 100644
--- a/cmake/waLBerlaHelperFunctions.cmake
+++ b/cmake/waLBerlaHelperFunctions.cmake
@@ -156,6 +156,11 @@ function ( waLBerla_export )
     
     # Export link paths
     set ( WALBERLA_LINK_DIRS ${LINK_DIRS} CACHE INTERNAL "waLBerla link directories" )
+
+    set( WALBERLA_CXX_STANDARD ${CMAKE_CXX_STANDARD} CACHE INTERNAL "CXX standard")
+    set( WALBERLA_CXX_STANDARD_REQUIRED ${CMAKE_CXX_STANDARD_REQUIRED} CACHE INTERNAL "CXX Standard Required")
+    set( WALBERLA_CXX_EXTENSIONS ${CMAKE_CXX_EXTENSIONS} CACHE INTERNAL "CXX Extensions")
+
 endfunction( waLBerla_export)
 
 #######################################################################################################################
@@ -210,7 +215,11 @@ function ( waLBerla_import )
     endif()
     
     set( SERVICE_LIBS ${WALBERLA_SERVICE_LIBS} PARENT_SCOPE )
-    
+
+    set( CMAKE_CXX_STANDARD ${WALBERLA_CXX_STANDARD}  PARENT_SCOPE)
+    set( CMAKE_CXX_STANDARD_REQUIRED ${WALBERLA_STANDARD_REQUIRED} PARENT_SCOPE)
+    set( CMAKE_CXX_EXTENSIONS ${WALBERLA_EXTENSIONS} PARENT_SCOPE)
+
     link_directories( ${WALBERLA_LINK_DIRS} )
 endfunction( waLBerla_import)
 #######################################################################################################################
diff --git a/src/blockforest/loadbalancing/DynamicParMetis.cpp b/src/blockforest/loadbalancing/DynamicParMetis.cpp
index a612d47759d6e5425c9b699ef290cd1237387c7b..0ec01197de2e98535cf4e0dc225ef3453518e718 100644
--- a/src/blockforest/loadbalancing/DynamicParMetis.cpp
+++ b/src/blockforest/loadbalancing/DynamicParMetis.cpp
@@ -30,20 +30,28 @@
 #include "core/mpi/MPIManager.h"
 #include "core/mpi/Gather.h"
 #include "core/mpi/Gatherv.h"
+#include "core/mpi/Reduce.h"
 
 #include "core/timing/Timer.h"
 
 #include <boost/algorithm/string/case_conv.hpp>
 #include <boost/algorithm/string/trim.hpp>
 
+#include <array>
+#include <vector>
+
 namespace walberla {
 namespace blockforest {
 
-std::pair<uint_t, uint_t> getBlockSequenceRange( const PhantomBlockForest & phantomForest, MPI_Comm comm )
+std::pair<uint_t, uint_t> getBlockSequenceRange( uint_t numLocalBlocks, MPI_Comm comm )
 {
    const uint_t rank = uint_c(mpi::translateRank(mpi::MPIManager::instance()->comm(), comm, MPIManager::instance()->rank()));
-
-   uint_t numLocalBlocks = phantomForest.getNumberOfBlocks();
+   WALBERLA_DEBUG_SECTION()
+   {
+      int rankRaw;
+      MPI_Comm_rank(comm, &rankRaw);
+      WALBERLA_ASSERT_EQUAL(rank, rankRaw);
+   }
 
    size_t sequenceStartOnProcess = 0;
    MPI_Exscan( &numLocalBlocks, &sequenceStartOnProcess, 1, MPITrait<uint_t>::type(), MPI_SUM, comm );
@@ -53,14 +61,13 @@ std::pair<uint_t, uint_t> getBlockSequenceRange( const PhantomBlockForest & phan
    return std::make_pair( sequenceStartOnProcess, sequenceStartOnProcess + numLocalBlocks );
 }
 
-std::map< blockforest::BlockID, uint_t > getBlockIdToSequenceMapping( const PhantomBlockForest & phantomForest, const std::pair<uint_t, uint_t> & blockSequenceRange, MPI_Comm comm )
+std::map< blockforest::BlockID, uint_t > getBlockIdToSequenceMapping( const PhantomBlockForest& phantomForest, const std::vector< std::pair< const PhantomBlock *, uint_t > > & targetProcess, const std::pair<uint_t, uint_t> & blockSequenceRange, MPI_Comm comm )
 {
    std::map< blockforest::BlockID, uint_t > mapping;
 
-   const auto & blockMap = phantomForest.getBlockMap();
    uint_t sequenceId = blockSequenceRange.first;
-   for( auto it = blockMap.begin(); it != blockMap.end(); ++it )
-      mapping.insert( std::make_pair( it->first, sequenceId++ ) );
+   for( auto it = targetProcess.begin(); it != targetProcess.end(); ++it )
+      mapping.insert( std::make_pair( it->first->getId(), sequenceId++ ) );
    WALBERLA_ASSERT_EQUAL( sequenceId, blockSequenceRange.second );
 
    const std::vector<uint_t> neighborProcesses = phantomForest.getNeighboringProcesses();
@@ -68,7 +75,11 @@ std::map< blockforest::BlockID, uint_t > getBlockIdToSequenceMapping( const Phan
    mpi::BufferSystem bs( comm );
 
    for( auto it = neighborProcesses.begin(); it != neighborProcesses.end(); ++it )
-      bs.sendBuffer( mpi::translateRank(mpi::MPIManager::instance()->comm(), comm, int_c(*it)) ) << mapping;
+   {
+      auto destRank = mpi::translateRank(mpi::MPIManager::instance()->comm(), comm, int_c(*it));
+      if (destRank != -1)
+         bs.sendBuffer( destRank ) << mapping;
+   }
 
    bs.setReceiverInfoFromSendBufferState( false, true );
 
@@ -101,6 +112,8 @@ T * ptr( std::vector<T> & v )
 
 typedef uint_t idx_t;
 
+
+
 bool DynamicParMetis::operator()( std::vector< std::pair< const PhantomBlock *, uint_t > > & targetProcess,
                                   std::set< uint_t > & processesToRecvFrom,
                                   const PhantomBlockForest & phantomForest,
@@ -111,11 +124,11 @@ bool DynamicParMetis::operator()( std::vector< std::pair< const PhantomBlock *,
    globalTimer.start();
 
    //create new communicator which excludes processes which do not have blocks
-   MPI_Comm subComm;
+   MPI_Comm subComm = MPI_COMM_NULL;
    MPI_Group allGroup, subGroup;
    MPI_Comm_group( MPIManager::instance()->comm(), &allGroup );
    std::vector<int> ranks;
-   if (phantomForest.getNumberOfBlocks() > 0)
+   if (targetProcess.size() > 0)
       ranks.push_back( MPIManager::instance()->rank() );
    ranks = mpi::allGatherv( ranks, MPIManager::instance()->comm() );
    auto numSubProcesses = ranks.size();
@@ -123,22 +136,51 @@ bool DynamicParMetis::operator()( std::vector< std::pair< const PhantomBlock *,
    MPI_Group_incl(allGroup, int_c(ranks.size()), &ranks[0], &subGroup);
    MPI_Comm_create( MPIManager::instance()->comm(), subGroup, &subComm);
 
+   if ( targetProcess.size() != 0)
+   {
+      int subRank;
+      int subSize;
+      MPI_Comm_rank(subComm, &subRank);
+      MPI_Comm_size(subComm, &subSize);
+      WALBERLA_CHECK_GREATER_EQUAL(subRank, 0);
+      WALBERLA_CHECK_LESS(subRank, subSize);
+   } else
+   {
+      int subRank;
+      MPI_Comm_rank(subComm, &subRank);
+      WALBERLA_CHECK_EQUAL(subRank, MPI_UNDEFINED);
+   }
+
    int64_t edgecut = 0;
-   std::vector<int64_t> part( phantomForest.getNumberOfBlocks(), int64_c( MPIManager::instance()->rank() ) );
+   WALBERLA_CHECK_EQUAL( phantomForest.getNumberOfBlocks(), targetProcess.size() );
+   std::vector<int64_t> part( targetProcess.size(), int64_c( MPIManager::instance()->rank() ) );
 
    if (subComm != MPI_COMM_NULL)
    {
-      const std::pair<uint_t, uint_t> blockSequenceRange = getBlockSequenceRange( phantomForest, subComm );
-      const std::map< blockforest::BlockID, uint_t > mapping = getBlockIdToSequenceMapping( phantomForest, blockSequenceRange, subComm );
+      int subRank;
+      int subSize;
+      MPI_Comm_rank(subComm, &subRank);
+      MPI_Comm_size(subComm, &subSize);
+
+      WALBERLA_CHECK_UNEQUAL(targetProcess.size(), 0);
+      const std::pair<uint_t, uint_t> blockSequenceRange = getBlockSequenceRange( targetProcess.size(), subComm );
+      const std::map< blockforest::BlockID, uint_t > mapping = getBlockIdToSequenceMapping( phantomForest, targetProcess, blockSequenceRange, subComm ); //blockid to vertex id
 
       std::vector<int64_t> vtxdist = mpi::allGather( int64_c( blockSequenceRange.second ), subComm );
       vtxdist.insert( vtxdist.begin(), uint_t( 0 ) );
+      WALBERLA_CHECK_EQUAL( vtxdist.size(), subSize + 1 );
+      for (size_t i = 1; i < vtxdist.size(); ++i)
+      {
+         WALBERLA_ASSERT_LESS( vtxdist[i-1], vtxdist[i] );
+      }
 
       std::vector<int64_t> adjncy, xadj, vsize, vwgt, adjwgt;
       std::vector<double> xyz;
 
-      for( auto it = targetProcess.begin(); it != targetProcess.end(); ++it )
+      uint_t blockIndex = 0;
+      for( auto it = targetProcess.begin(); it != targetProcess.end(); ++it, ++blockIndex )
       {
+         WALBERLA_CHECK_EQUAL(blockIndex, mapping.find(it->first->getId())->second - blockSequenceRange.first);
          xadj.push_back( int64_c( adjncy.size() ) );
          const PhantomBlock & block = *( it->first );
          auto bi = block.getData< DynamicParMetisBlockInfo >();
@@ -150,9 +192,12 @@ bool DynamicParMetis::operator()( std::vector< std::pair< const PhantomBlock *,
             {
                auto mapIt = mapping.find( nit->getId() );
                WALBERLA_ASSERT_UNEQUAL( mapIt, mapping.end(), "BlockId of neighbor is not contained in sequence mapping!" );
+               WALBERLA_CHECK_GREATER_EQUAL( mapIt->second, 0 );
+               WALBERLA_CHECK_LESS( mapIt->second, vtxdist.back() );
                adjncy.push_back( int64_c( mapIt->second ) );
                auto edgeWeightIt = bi.getEdgeWeights().find( nit->getId() );
-               adjwgt.push_back( edgeWeightIt == bi.getEdgeWeights().end() ? int64_t( 0 ) : edgeWeightIt->second );
+               //WALBERLA_CHECK_UNEQUAL( edgeWeightIt->second, 0 ); // perhaps WARNING
+               adjwgt.push_back( edgeWeightIt == bi.getEdgeWeights().end() ? int64_t( 1 ) : edgeWeightIt->second );
             }
             break;
          case PARMETIS_EDGES_FROM_EDGE_WEIGHTS:
@@ -160,10 +205,15 @@ bool DynamicParMetis::operator()( std::vector< std::pair< const PhantomBlock *,
             {
                auto mapIt = mapping.find( edgeIt->first );
                WALBERLA_ASSERT_UNEQUAL( mapIt, mapping.end(), "BlockId of neighbor is not contained in sequence mapping!" );
+               WALBERLA_CHECK_GREATER_EQUAL( mapIt->second, 0 );
+               WALBERLA_CHECK_LESS( mapIt->second, vtxdist.back() );
                adjncy.push_back( int64_c( mapIt->second ) );
+               //WALBERLA_CHECK_UNEQUAL( edgeIt->second, 0 ); // perhaps WARNING
                adjwgt.push_back( edgeIt->second );
             }
+            break;
          }
+         WALBERLA_CHECK_UNEQUAL( bi.getVertexWeight(), 0 );
          vwgt.push_back( bi.getVertexWeight() );
          vsize.push_back( bi.getVertexSize() );
          xyz.push_back( bi.getVertexCoords()[0] );
@@ -172,45 +222,58 @@ bool DynamicParMetis::operator()( std::vector< std::pair< const PhantomBlock *,
       }
       xadj.push_back( int64_c( adjncy.size() ) );
 
-      WALBERLA_ASSERT_EQUAL( vtxdist.size(), numSubProcesses + uint_t( 1 ) );
-      WALBERLA_ASSERT_EQUAL( xadj.size(), phantomForest.getNumberOfBlocks() + 1 );
-      WALBERLA_ASSERT_EQUAL( vwgt.size(), phantomForest.getNumberOfBlocks() );
-      WALBERLA_ASSERT_EQUAL( vsize.size(), phantomForest.getNumberOfBlocks() );
-      WALBERLA_ASSERT_EQUAL( adjncy.size(), adjwgt.size() );
-
-      int64_t wgtflag = weightsToUse_;
-      int64_t numflag = 0; // C-style ordering
-      int64_t ncon = 1; // Number of constraints
-      int64_t ndims = 3; // Number of dimensions
-      double ubvec[] = { real_t( 1.05 ) }; // imbalance tolerance
-      int64_t nparts = int64_c( MPIManager::instance()->numProcesses() ); // number of subdomains
-      double ipc2redist = real_t( 1000000.0 ); // compute repartitioning with low edge cut (set lower (down to 0.000001) to get minimal repartitioning )
-      MPI_Comm comm = subComm; //MPIManager::instance()->comm();
+      WALBERLA_CHECK_EQUAL( vtxdist.size(), numSubProcesses + uint_t( 1 ) );
+      WALBERLA_CHECK_EQUAL( xadj.size(), targetProcess.size() + 1 );
+      WALBERLA_CHECK_EQUAL( xadj.front(), 0);
+      WALBERLA_CHECK_EQUAL( xadj.back(), adjncy.size() );
+      for (size_t i = 1; i < xadj.size(); ++i)
+      {
+         WALBERLA_ASSERT_LESS( xadj[i-1], xadj[i] );
+      }
+      WALBERLA_CHECK_EQUAL( vwgt.size(), targetProcess.size() );
+      WALBERLA_CHECK_EQUAL( vsize.size(), targetProcess.size() );
+      WALBERLA_CHECK_EQUAL( xyz.size(), targetProcess.size() * 3 );
+      WALBERLA_CHECK_EQUAL( adjncy.size(), adjwgt.size() );
+      WALBERLA_CHECK_EQUAL( adjwgt.size(), xadj.back() );
+
+      int64_t wgtflag         = weightsToUse_;
+      int64_t numflag         = 0; // C-style ordering
+      int64_t ncon            = 1; // Number of constraints
+      int64_t ndims           = 3; // Number of dimensions
+      std::vector<double>  ubvec( uint_c(ncon), double_c( 1.05 ) ); // imbalance tolerance
+      int64_t nparts          = int64_c( MPIManager::instance()->numProcesses() ); // number of subdomains
+      double    ipc2redist    = double_c(ipc2redist_);
+      MPI_Comm comm           = subComm; //MPIManager::instance()->comm();
       std::vector<double> tpwgts( uint_c(nparts * ncon), 1.0 / double_c( nparts ) ); // vertex weight fraction that is stored in a subdomain
-      int64_t options[] = { int64_t( 1 ), int64_t( 0 ), int64_t( 23 ), int64_t( 1 ) };
+      std::vector<int64_t> options = { int64_t( 1 ), int64_t( 0 ), int64_t( 23 ), int64_t( 1 ) };
 
       int metisResult = core::METIS_OK;
 
       switch( algorithm_ )
       {
+      case PARMETIS_PART_GEOM:
+         parmetisTimer.start();
+         metisResult = core::ParMETIS_V3_PartGeom( ptr( vtxdist ), &ndims, ptr( xyz ), ptr( part ), &comm );
+         parmetisTimer.end();
+         break;
       case PARMETIS_PART_GEOM_KWAY:
          parmetisTimer.start();
-         metisResult = core::ParMETIS_V3_PartGeomKway( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( adjwgt ), &wgtflag, &numflag, &ndims, ptr( xyz ), &ncon, &nparts, ptr( tpwgts ), ubvec, options, &edgecut, ptr( part ), &comm );
+         metisResult = core::ParMETIS_V3_PartGeomKway( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( adjwgt ), &wgtflag, &numflag, &ndims, ptr( xyz ), &ncon, &nparts, ptr( tpwgts ), ptr( ubvec ), ptr( options ), &edgecut, ptr( part ), &comm );
          parmetisTimer.end();
          break;
       case PARMETIS_PART_KWAY:
          parmetisTimer.start();
-         metisResult = core::ParMETIS_V3_PartKway( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( adjwgt ), &wgtflag, &numflag, &ncon, &nparts, ptr( tpwgts ), ubvec, options, &edgecut, ptr( part ), &comm );
+         metisResult = core::ParMETIS_V3_PartKway( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( adjwgt ), &wgtflag, &numflag, &ncon, &nparts, ptr( tpwgts ), ptr(ubvec), ptr(options), &edgecut, ptr( part ), &comm );
          parmetisTimer.end();
          break;
       case PARMETIS_ADAPTIVE_REPART:
          parmetisTimer.start();
-         metisResult = core::ParMETIS_V3_AdaptiveRepart( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( vsize ), ptr( adjwgt ), &wgtflag, &numflag, &ncon, &nparts, ptr( tpwgts ), ubvec, &ipc2redist, options, &edgecut, ptr( part ), &comm );
+         metisResult = core::ParMETIS_V3_AdaptiveRepart( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( vsize ), ptr( adjwgt ), &wgtflag, &numflag, &ncon, &nparts, ptr( tpwgts ), ptr(ubvec), &ipc2redist, ptr(options), &edgecut, ptr( part ), &comm );
          parmetisTimer.end();
          break;
       case PARMETIS_REFINE_KWAY:
          parmetisTimer.start();
-         metisResult = core::ParMETIS_V3_RefineKway( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( adjwgt ), &wgtflag, &numflag, &ncon, &nparts, ptr( tpwgts ), ubvec, options, &edgecut, ptr( part ), &comm );
+         metisResult = core::ParMETIS_V3_RefineKway( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( adjwgt ), &wgtflag, &numflag, &ncon, &nparts, ptr( tpwgts ), ptr(ubvec), ptr(options), &edgecut, ptr( part ), &comm );
          parmetisTimer.end();
          break;
       }
@@ -265,6 +328,8 @@ DynamicParMetis::Algorithm DynamicParMetis::stringToAlgorithm( std::string s )
 
    if( s == "PART_GEOM_KWAY" )
       return PARMETIS_PART_GEOM_KWAY;
+   else if( s == "PART_GEOM" )
+      return PARMETIS_PART_GEOM;
    else if( s == "PART_KWAY" )
       return PARMETIS_PART_KWAY;
    else if( s == "PART_ADAPTIVE_REPART" )
@@ -272,7 +337,7 @@ DynamicParMetis::Algorithm DynamicParMetis::stringToAlgorithm( std::string s )
    else if( s == "REFINE_KWAY" )
       return PARMETIS_REFINE_KWAY;
    else
-      WALBERLA_ABORT( "Illegal ParMetis algorithm specified! Valid choices are: \"PART_GEOM_KWAY\", \"PART_KWAY\", \"PART_ADAPTIVE_REPART\", or \"REFINE_KWAY\"." );
+      WALBERLA_ABORT( "Illegal ParMetis algorithm specified (" << s << ")! Valid choices are: \"PART_GEOM_KWAY\", \"PART_KWAY\", \"PART_ADAPTIVE_REPART\", or \"REFINE_KWAY\"." );
 }
 
 
@@ -290,7 +355,7 @@ DynamicParMetis::WeightsToUse DynamicParMetis::stringToWeightsToUse( std::string
    else if( s == "BOTH_WEIGHTS" )
       return PARMETIS_BOTH_WEIGHTS;
    else
-      WALBERLA_ABORT( "Illegal ParMetis weights usage specified! Valid choices are: \"NO_WEIGHTS\", \"EDGE_WEIGHTS\", \"VERTEX_WEIGHTS\", or \"BOTH_WEIGHTS\"." );
+      WALBERLA_ABORT( "Illegal ParMetis weights usage specified (" << s << ")! Valid choices are: \"NO_WEIGHTS\", \"EDGE_WEIGHTS\", \"VERTEX_WEIGHTS\", or \"BOTH_WEIGHTS\"." );
 }
 
 
@@ -304,7 +369,56 @@ DynamicParMetis::EdgeSource DynamicParMetis::stringToEdgeSource( std::string s )
    else if( s == "EDGES_FROM_EDGE_WEIGHTS" )
       return PARMETIS_EDGES_FROM_EDGE_WEIGHTS;
    else
-      WALBERLA_ABORT( "Illegal ParMetis weights usage specified! Valid choices are: \"EDGES_FROM_FOREST\" or \"EDGES_FROM_EDGE_WEIGHTS\"" );
+      WALBERLA_ABORT( "Illegal ParMetis weights usage specified (" << s << ")! Valid choices are: \"EDGES_FROM_FOREST\" or \"EDGES_FROM_EDGE_WEIGHTS\"" );
+}
+
+
+std::string DynamicParMetis::algorithmToString( ) const
+{
+   switch (algorithm_)
+   {
+   case walberla::blockforest::DynamicParMetis::PARMETIS_PART_GEOM_KWAY:
+      return "PART_GEOM_KWAY";
+   case walberla::blockforest::DynamicParMetis::PARMETIS_PART_GEOM:
+      return "PART_GEOM";
+   case walberla::blockforest::DynamicParMetis::PARMETIS_PART_KWAY:
+      return "PART_KWAY";
+   case walberla::blockforest::DynamicParMetis::PARMETIS_ADAPTIVE_REPART:
+      return "PART_ADAPTIVE_REPART";
+   case walberla::blockforest::DynamicParMetis::PARMETIS_REFINE_KWAY:
+      return "PARMETIS_REFINE_KWAY";
+   }
+   return "Unknown";
+}
+
+
+std::string DynamicParMetis::weightsToUseToString( ) const
+{
+   switch (weightsToUse_)
+   {
+   case walberla::blockforest::DynamicParMetis::PARMETIS_NO_WEIGHTS:
+      return "NO_WEIGHTS";
+   case walberla::blockforest::DynamicParMetis::PARMETIS_EDGE_WEIGHTS:
+      return "EDGE_WEIGHTS";
+   case walberla::blockforest::DynamicParMetis::PARMETIS_VERTEX_WEIGHTS:
+      return "VERTEX_WEIGHTS";
+   case walberla::blockforest::DynamicParMetis::PARMETIS_BOTH_WEIGHTS:
+      return "BOTH_WEIGHTS";
+   }
+   return "Unknown";
+}
+
+
+std::string DynamicParMetis::edgeSourceToString( ) const
+{
+   switch (edgeSource_)
+   {
+   case walberla::blockforest::DynamicParMetis::PARMETIS_EDGES_FROM_FOREST:
+      return "EDGES_FROM_FOREST";
+   case walberla::blockforest::DynamicParMetis::PARMETIS_EDGES_FROM_EDGE_WEIGHTS:
+      return "EDGES_FROM_EDGE_WEIGHTS";
+   }
+   return "Unknown";
 }
 
 
diff --git a/src/blockforest/loadbalancing/DynamicParMetis.h b/src/blockforest/loadbalancing/DynamicParMetis.h
index 927ca25698c5257a667674535d9838191634c1a6..d1b3acd1f877c755279f8a4b3f0d2a2586549cd7 100644
--- a/src/blockforest/loadbalancing/DynamicParMetis.h
+++ b/src/blockforest/loadbalancing/DynamicParMetis.h
@@ -23,10 +23,12 @@
 
 #include "blockforest/PhantomBlockForest.h"
 
+#include "core/debug/Debug.h"
 #include "core/DataTypes.h"
 #include "core/math/Vector3.h"
 #include "core/mpi/MPIWrapper.h"
 
+#include <cmath>
 #include <map>
 
 namespace walberla {
@@ -40,7 +42,7 @@ std::map< blockforest::BlockID, uint_t > getBlockIdToSequenceMapping( const Phan
 class DynamicParMetis
 {
 public:
-   enum Algorithm    { PARMETIS_PART_GEOM_KWAY, PARMETIS_PART_KWAY, PARMETIS_ADAPTIVE_REPART, PARMETIS_REFINE_KWAY };
+   enum Algorithm    { PARMETIS_PART_GEOM, PARMETIS_PART_GEOM_KWAY, PARMETIS_PART_KWAY, PARMETIS_ADAPTIVE_REPART, PARMETIS_REFINE_KWAY };
    enum WeightsToUse { PARMETIS_NO_WEIGHTS = 0, PARMETIS_EDGE_WEIGHTS = 1, PARMETIS_VERTEX_WEIGHTS = 2, PARMETIS_BOTH_WEIGHTS = 3 };
    enum EdgeSource   { PARMETIS_EDGES_FROM_FOREST, PARMETIS_EDGES_FROM_EDGE_WEIGHTS };
 
@@ -54,6 +56,10 @@ public:
                     const PhantomBlockForest & phantomForest,
                     const uint_t iteration ) const;
 
+
+   void   setipc2redist(double val) {ipc2redist_ = val;}
+   double getipc2redist() const {return ipc2redist_;}
+
    bool edgeWeightsUsed()   const { return ( weightsToUse_ == PARMETIS_EDGE_WEIGHTS   ) || ( weightsToUse_ == PARMETIS_BOTH_WEIGHTS ); }
    bool vertexWeightsUsed() const { return ( weightsToUse_ == PARMETIS_VERTEX_WEIGHTS ) || ( weightsToUse_ == PARMETIS_BOTH_WEIGHTS ); }
    bool vertexSizeUsed()    const { return algorithm_ == PARMETIS_ADAPTIVE_REPART; }
@@ -62,10 +68,16 @@ public:
    static WeightsToUse stringToWeightsToUse( std::string s );
    static EdgeSource   stringToEdgeSource( std::string s );
 
+   std::string algorithmToString() const;
+   std::string weightsToUseToString() const;
+   std::string edgeSourceToString() const;
+
 protected:
    Algorithm algorithm_;
    WeightsToUse weightsToUse_;
    EdgeSource edgeSource_;
+
+   double ipc2redist_ = real_t( 1000.0 ); ///< compute repartitioning with low edge cut (set lower (down to 0.000001) to get minimal repartitioning )
 };
 
 class DynamicParMetisBlockInfo
@@ -90,7 +102,7 @@ public:
    vsize_t getVertexSize() const { return vertexSize_; }
    void setVertexSize( const vsize_t size ) { vertexSize_ = size; }
 
-   const Vector3<double> & getVertexCoords() const { return vertexCoords_; }
+   const Vector3<double> & getVertexCoords() const { WALBERLA_ASSERT( !std::isnan(vertexCoords_[0]) && !std::isnan(vertexCoords_[1]) && !std::isnan(vertexCoords_[2]) ); return vertexCoords_; }
    void setVertexCoords( const Vector3<double> & p ) { vertexCoords_ = p; }
 
    void setEdgeWeight( const blockforest::BlockID & blockId, const weight_t edgeWeight ) { edgeWeights_[blockId] = edgeWeight; }
@@ -107,7 +119,7 @@ private:
 
    weight_t vertexWeight_; /// Defines the weight of a block
    vsize_t vertexSize_;    /// Defines the cost of rebalancing a block
-   Vector3<double> vertexCoords_; /// Defines where the block is located in space. Needed by some ParMetis algorithms
+   Vector3<double> vertexCoords_ = Vector3<double>(std::numeric_limits<double>::signaling_NaN()); /// Defines where the block is located in space. Needed by some ParMetis algorithms
    std::map< blockforest::BlockID, weight_t > edgeWeights_; /// Defines the cost of communication with other blocks
 };
 
diff --git a/src/core/mpi/MPIHelper.cpp b/src/core/mpi/MPIHelper.cpp
index 8822c198d4ba19d6f76005c8a34aa6d374245970..94f239a90e20e36fe35d7193976206b0e1add64e 100644
--- a/src/core/mpi/MPIHelper.cpp
+++ b/src/core/mpi/MPIHelper.cpp
@@ -20,28 +20,67 @@
 
 #include "MPIHelper.h"
 
+#include <core/debug/CheckFunctions.h>
+
 namespace walberla {
 namespace mpi {
 
+//!
+//! \brief This functions maps the rank in one communicator to the rank in another communicator.
+//! \param srcComm source communicator
+//! \param destComm destination communicator
+//! \param srcRank rank in the source communicator
+//! \return rank in the destination communicator or -1 if not available
+//!
 int translateRank(const MPI_Comm srcComm, const MPI_Comm destComm, const int srcRank)
 {
+   if (srcComm == destComm)
+   {
+      return srcRank;
+   }
+
    int destRank = -1;
    MPI_Group srcGroup, destGroup;
    MPI_Comm_group(srcComm, &srcGroup);
    MPI_Comm_group(destComm, &destGroup);
    MPI_Group_translate_ranks(srcGroup, 1, const_cast<int*>(&srcRank), destGroup, &destRank);
+   int size;
+   MPI_Comm_size(destComm, &size);
+   if (destRank == MPI_UNDEFINED) destRank = -1;
+   WALBERLA_CHECK_GREATER_EQUAL(destRank, -1);
+   WALBERLA_CHECK_LESS(destRank, size);
    MPI_Group_free(&srcGroup);
    MPI_Group_free(&destGroup);
    return destRank;
 }
 
+//!
+//! \brief This functions converts a array of ranks in one communicator to an array of ranks in another communicator.
+//! \param srcComm source communicator
+//! \param destComm destination communicator
+//! \param srcRank source ranks
+//! \return converted ranks, -1 if not available
+//!
 std::vector<int> translateRank(const MPI_Comm srcComm, const MPI_Comm destComm, const std::vector<int>& srcRank)
 {
+   if (srcComm == destComm)
+   {
+      return srcRank;
+   }
+
    std::vector<int> destRank(srcRank.size(), -1);
    MPI_Group srcGroup, destGroup;
    MPI_Comm_group(srcComm, &srcGroup);
    MPI_Comm_group(destComm, &destGroup);
    MPI_Group_translate_ranks(srcGroup, int_c(srcRank.size()), const_cast<int*>(&srcRank[0]), destGroup, &destRank[0]);
+   int size;
+   MPI_Comm_size(destComm, &size);
+   for (auto& dstRnk : destRank)
+   {
+      if (dstRnk == MPI_UNDEFINED) dstRnk = -1;
+      WALBERLA_CHECK_GREATER_EQUAL(dstRnk, -1);
+      WALBERLA_CHECK_LESS(dstRnk, size);
+   }
    MPI_Group_free(&srcGroup);
    MPI_Group_free(&destGroup);
    return destRank;
diff --git a/src/pe/amr/weight_assignment/MetisAssignmentFunctor.h b/src/pe/amr/weight_assignment/MetisAssignmentFunctor.h
index bed28616694dbdbff30914cba828d0ed22c6d6fe..720c80a2e363302f6b60a582c7628000cada10ea 100644
--- a/src/pe/amr/weight_assignment/MetisAssignmentFunctor.h
+++ b/src/pe/amr/weight_assignment/MetisAssignmentFunctor.h
@@ -35,26 +35,38 @@ public:
    typedef blockforest::DynamicParMetisBlockInfo           PhantomBlockWeight;
    typedef blockforest::DynamicParMetisBlockInfoPackUnpack PhantomBlockWeightPackUnpackFunctor;
 
-   MetisAssignmentFunctor( const shared_ptr<InfoCollection>& ic ) : ic_( ic )
-   {}
+   MetisAssignmentFunctor( shared_ptr<InfoCollection>& ic, const real_t baseWeight = real_t(10.0) ) : ic_(ic), baseWeight_(baseWeight) {}
 
    void operator()( std::vector< std::pair< const PhantomBlock *, walberla::any > > & blockData, const PhantomBlockForest & )
    {
       for( auto it = blockData.begin(); it != blockData.end(); ++it )
       {
-         const uint_t& weight = ic_->find( it->first->getId() )->second.numberOfLocalBodies;
-         blockforest::DynamicParMetisBlockInfo info( int64_c(weight) );
-         info.setVertexSize(int64_c( weight ));
+         const double weight     = double_c( ic_->find( it->first->getId() )->second.numberOfLocalBodies ) + baseWeight_;
+         //const double commWeight = double_c( edgeWeightFactor * (double_c(ic_->find( it->first->getId() )->second.numberOfShadowBodies) + baseWeight_)) + 1;
+         blockforest::DynamicParMetisBlockInfo info( 0 );
+         info.setVertexWeight( int64_c(weight) );
+         info.setVertexSize( int64_c( weight ) );
+         info.setVertexCoords( it->first->getAABB().center() );
          for( uint_t nb = uint_t(0); nb < it->first->getNeighborhoodSize(); ++nb )
          {
-            info.setEdgeWeight(it->first->getNeighborId(nb), int64_c(weight) );
+            info.setEdgeWeight(it->first->getNeighborId(nb), int64_c(edgeWeight_) );
          }
          it->second = info;
       }
    }
 
+   inline void   setBaseWeight( const double weight) { baseWeight_ = weight;}
+   inline double getBaseWeight() const { return baseWeight_; }
+
+   inline void   setEdgeWeight( const double weight) { edgeWeight_ = weight;}
+   inline double getEdgeWeight() const { return edgeWeight_; }
+
 private:
    shared_ptr< InfoCollection > ic_;
+
+   ///Base weight due to allocated data structures. A weight of zero for blocks is dangerous as empty blocks might accumulate on one process!
+   double baseWeight_ = 10.0;
+   double edgeWeight_ = 1.0;
 };
 
 }
diff --git a/src/pe/cr/DEM.h b/src/pe/cr/DEM.h
index 9d0887e774253fcfbb7c76e4ded484a30abbd843..dcc19af292c646addf2f0eac477d3c87e5a4218f 100644
--- a/src/pe/cr/DEM.h
+++ b/src/pe/cr/DEM.h
@@ -79,7 +79,7 @@ private:
    size_t                            numberOfContactsTreated_;
 };
 
-class DEM : public DEMSolver<IntegrateImplictEuler, ResolveContactSpringDashpotHaffWerner>
+class DEM : public DEMSolver<IntegrateImplicitEuler, ResolveContactSpringDashpotHaffWerner>
 {
 public:
    DEM(  const shared_ptr<BodyStorage>&    globalBodyStorage
@@ -88,8 +88,8 @@ public:
        , domain_decomposition::BlockDataID ccdID
        , domain_decomposition::BlockDataID fcdID
        , WcTimingTree*                     tt = NULL)
-   : DEMSolver<IntegrateImplictEuler, ResolveContactSpringDashpotHaffWerner>(
-              IntegrateImplictEuler(), ResolveContactSpringDashpotHaffWerner(),
+   : DEMSolver<IntegrateImplicitEuler, ResolveContactSpringDashpotHaffWerner>(
+              IntegrateImplicitEuler(), ResolveContactSpringDashpotHaffWerner(),
               globalBodyStorage, blockStorage, storageID, ccdID, fcdID, tt )
    {
    }
diff --git a/src/pe/cr/DEM.impl.h b/src/pe/cr/DEM.impl.h
index c64669ba210b48ca9f8101d7c09883c10405cc73..eb3a6b4e325616444808d21cf8d26db9fe9cc364 100644
--- a/src/pe/cr/DEM.impl.h
+++ b/src/pe/cr/DEM.impl.h
@@ -128,7 +128,7 @@ void DEMSolver<Integrator,ContactResolver>::timestep( real_t dt )
          WALBERLA_ASSERT( bodyIt->checkInvariants(), "Invalid body state detected" );
          WALBERLA_ASSERT( !bodyIt->hasSuperBody(), "Invalid superordinate body detected" );
          
-         // Moving the capsule according to the acting forces (don't move a sleeping capsule)
+         // Moving the body according to the acting forces (don't move a sleeping body)
          if( bodyIt->isAwake() && !bodyIt->hasInfiniteMass() )
          {
             integrate_( *bodyIt, dt, *this );
@@ -138,7 +138,7 @@ void DEMSolver<Integrator,ContactResolver>::timestep( real_t dt )
          bodyIt->resetForceAndTorque();
          
          // Checking the state of the rigid body
-         WALBERLA_ASSERT( bodyIt->checkInvariants(), "Invalid capsule state detected" );
+         WALBERLA_ASSERT( bodyIt->checkInvariants(), "Invalid body state detected" );
 
          // Resetting the acting forces
          bodyIt->resetForceAndTorque();
diff --git a/src/pe/cr/Integrators.h b/src/pe/cr/Integrators.h
index 30b56e50a9e308917872d03156447d9e3b520157..1ea365f6661f542c73fde4ad31255cb5b18c2097 100644
--- a/src/pe/cr/Integrators.h
+++ b/src/pe/cr/Integrators.h
@@ -30,7 +30,7 @@ namespace pe {
 namespace cr {
 
 //*************************************************************************************************
-/*!\brief Integrate the trajectory of one body using implict Euler.
+/*!\brief Integrate the trajectory of one body using implicit Euler.
 *
 * \param id Body ID.
 * \param dt Time step size.
@@ -40,7 +40,7 @@ namespace cr {
 * The implicit Euler algorithm, also known as backward Euler, is used. It is a first-order
 * integrator that does conserves energy (i.e. it is symplectic.)
 */
-class IntegrateImplictEuler {
+class IntegrateImplicitEuler {
 public:
    void operator()( BodyID id, real_t dt, ICR & solver ) const
    {
@@ -72,7 +72,7 @@ public:
       // Setting the axis-aligned bounding box
       id->calcBoundingBox();
 
-      // Calculating the current motion of the capsule
+      // Calculating the current motion of the body
       id->calcMotion();
    }
 };
@@ -88,7 +88,7 @@ public:
 * The explicit Euler algorithm, also known as forward Euler, is used. It is a first-order
 * integrator that does not conserve energy (i.e. it is not symplectic.)
 */
-class IntegrateExplictEuler {
+class IntegrateExplicitEuler {
 public:
    void operator()( BodyID id, real_t dt, ICR & solver ) const
    {
@@ -120,7 +120,7 @@ public:
       // Setting the axis-aligned bounding box
       id->calcBoundingBox();
 
-      // Calculating the current motion of the capsule
+      // Calculating the current motion of the body
       id->calcMotion();
    }
 };
diff --git a/src/pe/cr/PlainIntegrator.h b/src/pe/cr/PlainIntegrator.h
index 271847f5d3d9b601452553d08d4a85bae8a4eb71..92abbbfb3746b59f08927fb9222b4b72bd00045f 100644
--- a/src/pe/cr/PlainIntegrator.h
+++ b/src/pe/cr/PlainIntegrator.h
@@ -61,14 +61,14 @@ private:
    WcTimingTree*                     tt_;
 };
    
-class PlainIntegrator : public PlainIntegratorSolver<IntegrateImplictEuler>
+class PlainIntegrator : public PlainIntegratorSolver<IntegrateImplicitEuler>
 {
 public:
    PlainIntegrator(  const shared_ptr<BodyStorage>&    globalBodyStorage
                    , const shared_ptr<BlockStorage>&   blockStorage
                    , domain_decomposition::BlockDataID storageID
                    , WcTimingTree*                     tt = NULL)
-   : PlainIntegratorSolver<IntegrateImplictEuler>( IntegrateImplictEuler(), globalBodyStorage, blockStorage,
+   : PlainIntegratorSolver<IntegrateImplicitEuler>( IntegrateImplicitEuler(), globalBodyStorage, blockStorage,
                                                    storageID, tt )
    {
    }
diff --git a/src/pe/cr/PlainIntegrator.impl.h b/src/pe/cr/PlainIntegrator.impl.h
index 7b9c4ce45101e774ed936da68e81caab088f1f95..b96a6e2e0e46cac743062832c3ee4e8fc19f906b 100644
--- a/src/pe/cr/PlainIntegrator.impl.h
+++ b/src/pe/cr/PlainIntegrator.impl.h
@@ -65,14 +65,14 @@ void PlainIntegratorSolver<Integrator>::timestep( const real_t dt )
       BodyStorage& localStorage = (*storage)[0];
       for (auto bd = localStorage.begin(); bd != localStorage.end(); ++bd){
          // Checking the state of the body
-         WALBERLA_ASSERT( bd->checkInvariants(), "Invalid capsule state detected" );
+         WALBERLA_ASSERT( bd->checkInvariants(), "Invalid body state detected" );
          WALBERLA_ASSERT( !bd->hasSuperBody(), "Invalid superordinate body detected" );
 
          // Resetting the contact node and removing all attached contacts
       //      bd->resetNode();
          bd->clearContacts();
 
-         // Moving the capsule according to the acting forces (don't move a sleeping capsule)
+         // Moving the body according to the acting forces (don't move a sleeping body)
          if( bd->isAwake() && !bd->hasInfiniteMass() ) {
             integrate_( *bd, dt, *this );
          }
@@ -80,8 +80,8 @@ void PlainIntegratorSolver<Integrator>::timestep( const real_t dt )
          // Resetting the acting forces
          bd->resetForceAndTorque();
 
-         // Checking the state of the capsule
-         WALBERLA_ASSERT( bd->checkInvariants(), "Invalid capsule state detected" );
+         // Checking the state of the body
+         WALBERLA_ASSERT( bd->checkInvariants(), "Invalid body state detected" );
       }
    }
    if (tt_!=NULL) tt_->stop("Integrate Bodies");
diff --git a/src/pe/rigidbody/CylindricalBoundary.cpp b/src/pe/rigidbody/CylindricalBoundary.cpp
index 6b99c946384f11f2ecc5cd9f3d88a4e65956622d..66f639e49015a66ab41caf851690e678149c507b 100644
--- a/src/pe/rigidbody/CylindricalBoundary.cpp
+++ b/src/pe/rigidbody/CylindricalBoundary.cpp
@@ -56,7 +56,7 @@ namespace pe {
 CylindricalBoundary::CylindricalBoundary( id_t sid, id_t uid, const Vec3& gpos, const real_t radius,
                                           MaterialID material )
    : GeomPrimitive( getStaticTypeID(), sid, uid, material )           // Initializing the base object
-   , radius_(radius)                                                  // Radius of the cylinder                                                // Length of the capsule
+   , radius_(radius)                                                  // Radius of the cylinder
 {
    //boundaries are always considered locally and have infinite mass
    setGlobal( true );
@@ -66,11 +66,11 @@ CylindricalBoundary::CylindricalBoundary( id_t sid, id_t uid, const Vec3& gpos,
 
    // Checking the radius
    // Since the constructor is never directly called but only used in a small number
-   // of functions that already check the capsule arguments, only asserts are used here to
+   // of functions that already check the cylinder arguments, only asserts are used here to
    // double check the arguments.
-   WALBERLA_ASSERT_GREATER( radius, real_t(0), "Invalid capsule radius"  );
+   WALBERLA_ASSERT_GREATER( radius, real_t(0), "Invalid cylinder radius"  );
 
-   // Initializing the instantiated capsule
+   // Initializing the instantiated cylinder
    gpos_   = gpos;
    q_      = Quat();                 // Setting the orientation
    R_      = q_.toRotationMatrix();  // Setting the rotation matrix
diff --git a/src/pe/rigidbody/CylindricalBoundary.h b/src/pe/rigidbody/CylindricalBoundary.h
index 4c095c1a20bae85959c756a856500ec7b4332b93..5d9b4c9d5da23922f0858500e065b25ca884a6f0 100644
--- a/src/pe/rigidbody/CylindricalBoundary.h
+++ b/src/pe/rigidbody/CylindricalBoundary.h
@@ -175,7 +175,7 @@ inline id_t CylindricalBoundary::getStaticTypeID()
 //=================================================================================================
 
 //*************************************************************************************************
-/*!\name Capsule operators */
+/*!\name Cylinder operators */
 //@{
 std::ostream& operator<<( std::ostream& os, const CylindricalBoundary& cb );
 std::ostream& operator<<( std::ostream& os, CylindricalBoundaryID cb );
diff --git a/src/python_coupling/Manager.cpp b/src/python_coupling/Manager.cpp
index 264ba49b6eaa356cac7579415bfea13c5bb01d24..9ccf4a597e06b5a80f0346702484112ecdbd41f1 100644
--- a/src/python_coupling/Manager.cpp
+++ b/src/python_coupling/Manager.cpp
@@ -75,11 +75,9 @@ void Manager::addPath( const std::string & path )
 {
    WALBERLA_ASSERT( initialized_ );
 
-   object main_module  = import("__main__");
-   dict globals = extract<dict>( main_module.attr( "__dict__" ) );
-   exec( "import sys", globals );
-   std::string pathCommand = std::string ( "sys.path.append( \"") + path + "\" ) ";
-   exec( str(pathCommand), globals );
+   object sys = import("sys");
+   list sys_path = extract<list>( sys.attr("path") );
+   sys_path.append(path);
 }
 
 void Manager::triggerInitialization()
@@ -116,10 +114,24 @@ void Manager::triggerInitialization()
 
    }
    catch ( boost::python::error_already_set & ) {
-      PyErr_Print();
+      PyObject *type_ptr = NULL, *value_ptr = NULL, *traceback_ptr = NULL;
+      PyErr_Fetch(&type_ptr, &value_ptr, &traceback_ptr);
+
+      if( type_ptr )
+      {
+         extract<std::string> type_str(( str( handle<>( type_ptr ) ) ));
+         if( type_str.check() )
+            WALBERLA_LOG_DEVEL( type_str() );
+      }
+      if(value_ptr)
+      {
+         extract<std::string> value_str(( str( handle<>( value_ptr ) ) ));
+         if ( value_str.check() )
+            WALBERLA_LOG_DEVEL( value_str() );
+      }
+
       WALBERLA_ABORT( "Error while initializing Python" );
    }
-
 }
 
 
@@ -159,4 +171,3 @@ int someSymbolSoThatLinkerDoesNotComplain=0;
 
 
 
-
diff --git a/src/waLBerlaDefinitions.in.h b/src/waLBerlaDefinitions.in.h
index 26b6627685ab451ae693fb32f8af4dfe0080d8aa..563ec863a1c6694b69dab1efd3b5cca2253aaac7 100644
--- a/src/waLBerlaDefinitions.in.h
+++ b/src/waLBerlaDefinitions.in.h
@@ -31,6 +31,8 @@
 
 #cmakedefine WALBERLA_BUILD_WITH_CUDA
 
+#cmakedefine WALBERLA_BUILD_WITH_CODEGEN
+
 #cmakedefine WALBERLA_BUFFER_DEBUG
 
 #cmakedefine WALBERLA_THREAD_SAFE_LOGGING
diff --git a/tests/pe/CMakeLists.txt b/tests/pe/CMakeLists.txt
index 64a077d081657246870a0bbac6583cc58bb8acb0..ce10e244c19ea13befd35797f6ce1d7e87991499 100644
--- a/tests/pe/CMakeLists.txt
+++ b/tests/pe/CMakeLists.txt
@@ -69,6 +69,11 @@ waLBerla_execute_test( NAME   PE_OVERLAP )
 waLBerla_compile_test( NAME   PE_PARALLELEQUIVALENCE FILES ParallelEquivalence.cpp DEPENDS core blockforest  )
 waLBerla_execute_test( NAME   PE_PARALLELEQUIVALENCE PROCESSES 4 )
 
+if( WALBERLA_BUILD_WITH_PARMETIS )
+   waLBerla_compile_test( NAME   PE_PARMETIS FILES ParMetis.cpp DEPENDS core blockforest  )
+   waLBerla_execute_test( NAME   PE_PARMETIS PROCESSES 64 )
+endif()
+
 waLBerla_compile_test( NAME   PE_PARSEMESSAGE FILES ParseMessage.cpp DEPENDS core  )
 waLBerla_execute_test( NAME   PE_PARSEMESSAGE )
 
diff --git a/tests/pe/ParMetis.cpp b/tests/pe/ParMetis.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e35a8cb2336015a2f6aa3f925a1c4a74c9ed263b
--- /dev/null
+++ b/tests/pe/ParMetis.cpp
@@ -0,0 +1,144 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file ParMetis.cpp
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//
+//======================================================================================================================
+
+#include "pe/utility/CreateWorld.h"
+
+#include <blockforest/loadbalancing/DynamicParMetis.h>
+#include <core/Environment.h>
+#include <core/logging/Logging.h>
+#include <core/math/Sample.h>
+
+using namespace walberla;
+using namespace walberla::pe;
+
+class ReGrid
+{
+public:
+   void operator()( std::vector< std::pair< const Block *, uint_t > > & minTargetLevels,
+                    std::vector< const Block * > &, const BlockForest & /*forest*/ )
+   {
+      std::for_each( minTargetLevels.begin(),
+                     minTargetLevels.end(),
+                     [](auto& pair){pair.second = pair.first->getLevel() + 1;} );
+   }
+
+};
+
+class MetisAssignmentFunctor
+{
+public:
+
+   typedef blockforest::DynamicParMetisBlockInfo           PhantomBlockWeight;
+   typedef blockforest::DynamicParMetisBlockInfoPackUnpack PhantomBlockWeightPackUnpackFunctor;
+
+   void operator()( std::vector< std::pair< const PhantomBlock *, walberla::any > > & blockData, const PhantomBlockForest & )
+   {
+      for( auto it = blockData.begin(); it != blockData.end(); ++it )
+      {
+         const auto&   corner = it->first->getAABB().maxCorner();
+         const int weight     = int_c( corner[0] + corner[1] + corner[2] );
+         blockforest::DynamicParMetisBlockInfo info(0);
+         info.setVertexWeight( weight );
+         info.setVertexSize( weight );
+         info.setVertexCoords( it->first->getAABB().center() );
+         for( uint_t nb = uint_t(0); nb < it->first->getNeighborhoodSize(); ++nb )
+         {
+            info.setEdgeWeight(it->first->getNeighborId(nb), int64_c(weight) );
+         }
+         it->second = info;
+      }
+   }
+};
+
+int parmetisTest(const std::string& algorithm,
+                 const std::string& weightsToUse,
+                 const std::string& edgeSource)
+{
+   walberla::MPIManager::instance()->resetMPI();
+   walberla::MPIManager::instance()->useWorldComm();
+
+   WALBERLA_LOG_INFO_ON_ROOT("****** " << algorithm << " | " << weightsToUse << " | " << edgeSource);
+
+   // create forest
+   shared_ptr< BlockForest > forest   = createBlockForest( math::AABB(0,0,0,40,40,40),
+                                                           Vector3<uint_t>(4, 4, 4),
+                                                           Vector3<bool>(false, false, false),
+                                                           64,
+                                                           0);
+   if (!forest)
+   {
+      WALBERLA_LOG_INFO_ON_ROOT( "No BlockForest created ... exiting!");
+      return EXIT_SUCCESS;
+   }
+
+   forest->setRefreshMinTargetLevelDeterminationFunction( ReGrid() );
+
+   auto assFunc = MetisAssignmentFunctor();
+   forest->setRefreshPhantomBlockDataAssignmentFunction( assFunc );
+   forest->setRefreshPhantomBlockDataPackFunction( MetisAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+   forest->setRefreshPhantomBlockDataUnpackFunction( MetisAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+
+   auto alg     = blockforest::DynamicParMetis::stringToAlgorithm(    algorithm );
+   auto vWeight = blockforest::DynamicParMetis::stringToWeightsToUse( weightsToUse );
+   auto eWeight = blockforest::DynamicParMetis::stringToEdgeSource(   edgeSource );
+
+   auto prepFunc = blockforest::DynamicParMetis( alg, vWeight, eWeight );
+   forest->setRefreshPhantomBlockMigrationPreparationFunction( prepFunc );
+
+   forest->refresh();
+
+   math::Sample numBlocks;
+   numBlocks.castToRealAndInsert( forest->size() );
+   numBlocks.mpiGatherRoot();
+   WALBERLA_LOG_INFO_ON_ROOT("#blocks: " << numBlocks.format() );
+
+   int weight = 0;
+   for (const auto& block : *forest)
+   {
+      const auto&   corner = block.getAABB().maxCorner();
+      weight    += int_c( corner[0] + corner[1] + corner[2] );
+   }
+   math::Sample weightSample;
+   weightSample.castToRealAndInsert( weight );
+   weightSample.mpiGatherRoot();
+   WALBERLA_LOG_INFO_ON_ROOT("#weights: " << weightSample.format() );
+
+   return EXIT_SUCCESS;
+}
+
+int main( int argc, char ** argv )
+{
+   walberla::debug::enterTestMode();
+   walberla::MPIManager::instance()->initializeMPI( &argc, &argv );
+   walberla::MPIManager::instance()->useWorldComm();
+
+   std::vector<std::string> algs = {"PART_GEOM", "PART_GEOM_KWAY", "PART_KWAY", "PART_ADAPTIVE_REPART", "REFINE_KWAY"};
+   std::vector<std::string> wtu  = {"NO_WEIGHTS", "EDGE_WEIGHTS", "VERTEX_WEIGHTS", "BOTH_WEIGHTS"};
+   std::vector<std::string> es   = {"EDGES_FROM_FOREST", "EDGES_FROM_EDGE_WEIGHTS"};
+
+   for (const auto& a : algs)
+      for (const auto& w : wtu)
+         for (const auto& e : es)
+         {
+            parmetisTest(a,w,e);
+         }
+
+   return EXIT_SUCCESS;
+}
diff --git a/tests/pe_coupling/momentum_exchange_method/SettlingSphereMEMDynamicRefinement.cpp b/tests/pe_coupling/momentum_exchange_method/SettlingSphereMEMDynamicRefinement.cpp
index 44b17eb70ee97b509d096e0b09133d97bc40efc8..0e44a2e57f402bee733f69a1d9ad9b00656056cc 100644
--- a/tests/pe_coupling/momentum_exchange_method/SettlingSphereMEMDynamicRefinement.cpp
+++ b/tests/pe_coupling/momentum_exchange_method/SettlingSphereMEMDynamicRefinement.cpp
@@ -869,9 +869,18 @@ int main( int argc, char **argv )
       {
          auto & forest = blocks->getBlockForest();
          pe::createWithNeighborhood(forest, bodyStorageID, *peInfoCollection);
-         pe::clearSynchronization( blockforest, bodyStorageID);
 
+         uint_t stampBefore = blocks->getBlockForest().getModificationStamp();
          blocks->refresh();
+         uint_t stampAfter = blocks->getBlockForest().getModificationStamp();
+
+         if(stampBefore == stampAfter)
+         {
+            // nothing has changed
+            continue;
+         }
+
+         pe::clearSynchronization( blockforest, bodyStorageID);
 
          for( uint_t syncStep = 0; syncStep < uint_c(diameter / real_c(minBlockSizeInCells)) + 1; ++syncStep)
             syncCall();