From c2a1f6d295591ddeac266e3a6c7742262fa186cc Mon Sep 17 00:00:00 2001
From: Samuel Kemmler <samuel.kemmler@fau.de>
Date: Wed, 26 Jul 2023 09:51:19 +0200
Subject: [PATCH] Add mpi-ext.h for MPIX_CUDA_AWARE_SUPPORT

---
 .clang-format                                 | 4 ++--
 src/core/mpi/MPIWrapper.h                     | 3 +++
 src/gpu/communication/NonUniformGPUScheme.h   | 7 ++++---
 src/gpu/communication/UniformGPUScheme.impl.h | 5 ++++-
 4 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/.clang-format b/.clang-format
index 5f0ff6558..e5114ffd0 100644
--- a/.clang-format
+++ b/.clang-format
@@ -61,8 +61,6 @@ IncludeCategories:
     Priority:        3
   - Regex:           '^"core/'
     Priority:        4
-  - Regex:           '^"cuda/'
-    Priority:        5
   - Regex:           '^"domain_decomposition/'
     Priority:        6
   - Regex:           '^"executiontree/'
@@ -75,6 +73,8 @@ IncludeCategories:
     Priority:        10
   - Regex:           '^"geometry/'
     Priority:        11
+  - Regex:           '^"gpu/'
+    Priority:        12
   - Regex:           '^"gui/'
     Priority:        12
   - Regex:           '^"lbm/'
diff --git a/src/core/mpi/MPIWrapper.h b/src/core/mpi/MPIWrapper.h
index 6b406c631..51ab22e26 100644
--- a/src/core/mpi/MPIWrapper.h
+++ b/src/core/mpi/MPIWrapper.h
@@ -66,6 +66,9 @@ namespace mpistubs {
 #   pragma GCC diagnostic ignored "-Wsign-conversion"
 #endif
 #include <mpi.h>
+#if defined(OPEN_MPI) && OPEN_MPI
+#include <mpi-ext.h>
+#endif
 #if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
 #   pragma GCC diagnostic pop
 #endif
diff --git a/src/gpu/communication/NonUniformGPUScheme.h b/src/gpu/communication/NonUniformGPUScheme.h
index 1d3583a12..093ec4cad 100644
--- a/src/gpu/communication/NonUniformGPUScheme.h
+++ b/src/gpu/communication/NonUniformGPUScheme.h
@@ -30,13 +30,13 @@
 
 #include "domain_decomposition/IBlock.h"
 
-#include "stencil/Directions.h"
-
 #include "gpu/ErrorChecking.h"
 #include "gpu/GPUWrapper.h"
 #include "gpu/communication/CustomMemoryBuffer.h"
 #include "gpu/communication/GeneratedNonUniformGPUPackInfo.h"
 
+#include "stencil/Directions.h"
+
 #include <memory>
 #include <thread>
 
@@ -138,7 +138,8 @@ NonUniformGPUScheme< Stencil >::NonUniformGPUScheme(const weak_ptr< StructuredBl
 {
    WALBERLA_MPI_SECTION()
    {
-#if !(defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT)
+// Open MPI supports compile time CUDA-aware support check
+#if (defined(OPEN_MPI) && OPEN_MPI) && !(defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT)
       WALBERLA_CHECK(!sendDirectlyFromGPU)
 #endif
    }
diff --git a/src/gpu/communication/UniformGPUScheme.impl.h b/src/gpu/communication/UniformGPUScheme.impl.h
index 8a8616c1e..93f6dd85e 100644
--- a/src/gpu/communication/UniformGPUScheme.impl.h
+++ b/src/gpu/communication/UniformGPUScheme.impl.h
@@ -19,6 +19,8 @@
 //
 //======================================================================================================================
 
+#include "core/mpi/MPIWrapper.h"
+
 #include "gpu/ParallelStreams.h"
 
 namespace walberla {
@@ -45,7 +47,8 @@ namespace communication {
    {
       WALBERLA_MPI_SECTION()
       {
-#if !(defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT)
+// Open MPI supports compile time CUDA-aware support check
+#if (defined(OPEN_MPI) && OPEN_MPI) && !(defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT)
          WALBERLA_CHECK(!sendDirectlyFromGPU)
 #endif
       }
-- 
GitLab