From 76ce5c85571a2f0be590fcc2bf3ac800bcf1ba45 Mon Sep 17 00:00:00 2001
From: Markus Holzer <markus.holzer@onera.fr>
Date: Thu, 16 Jan 2025 11:01:12 +0100
Subject: [PATCH] FirstRun

---
 .../PorousMediaGPU/InitializerFunctions.cpp   | 50 +++++++--------
 .../PorousMediaGPU/InitializerFunctions.h     |  2 +-
 .../PorousMediaGPU/PorousMediaGPU.cpp         | 61 ++++++++++++++++---
 .../PorousMediaGPU/PorousMediaGPU.prm         | 13 ++--
 .../PorousMediaGPU/PorousMediaGPU.py          | 20 +++---
 python/lbmpy_walberla/packing_kernels.py      |  6 +-
 6 files changed, 96 insertions(+), 56 deletions(-)

diff --git a/apps/showcases/PorousMediaGPU/InitializerFunctions.cpp b/apps/showcases/PorousMediaGPU/InitializerFunctions.cpp
index fb285f1c0..12b2336e2 100644
--- a/apps/showcases/PorousMediaGPU/InitializerFunctions.cpp
+++ b/apps/showcases/PorousMediaGPU/InitializerFunctions.cpp
@@ -34,48 +34,42 @@ using FlagField_T  = FlagField< uint8_t >;
 
 void init_TPMS(const shared_ptr< StructuredBlockStorage >& blocks, const BlockDataID flagFieldID,
                const FlagUID boundaryFlagUID,
-               const real_t kx, const real_t ky, const real_t kz)
+               const real_t waveNumber, const real_t scaling)
 {
    const real_t parts = real_c(blocks->getDomainCellBB().xMax()) / real_c(10.0);
    const real_t start = parts * real_c(2.0);
-   const real_t end1 = parts * real_c(5.0);
-   const real_t end2 = parts * real_c(8.0);
+   const real_t end   = parts * real_c(8.0);
 
-   const real_t strut = real_c(0.0);
+   const real_t kx1 = waveNumber;
+   const real_t ky1 = waveNumber;
+   const real_t kz1 = waveNumber;
+
+   const real_t kx2 = waveNumber * scaling;
+   const real_t ky2 = waveNumber * scaling;
+   const real_t kz2 = waveNumber * scaling;
+
+   const real_t strut = real_c(1.0);
 
    for (auto& block : *blocks)
    {
       const auto flagField    = block.template getData< FlagField_T >(flagFieldID);
       const auto boundaryFlag = flagField->getOrRegisterFlag(boundaryFlagUID);
       WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(flagField, Cell globalCell;
-         blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z));
-         if (globalCell[0] > start && globalCell[0] <= end1)
-         {
-            const real_t kxi = kx;
-            const real_t kyi = ky;
-            const real_t kzi = kz;
+      blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z));
+      if (globalCell[0] > start && globalCell[0] <= end)
+      {
+         Vector3<real_t> p;
+         blocks->getCellCenter(p, globalCell, 0);
 
-            const real_t dist = cos(kxi * x) * sin(kyi * y) + cos(kyi * y) * sin(kzi * z) + cos(kzi * z) * sin(kxi * x);
+         const real_t dist1 = cos(kx1 * p[0]) * sin(ky1 * p[1]) + cos(ky1 * p[1]) * sin(kz1 * p[2]) + cos(kz1 * p[2]) * sin(kx1 * p[0]);
+         const real_t dist2 = cos(kx2 * p[0]) * sin(ky2 * p[1]) + cos(ky2 * p[1]) * sin(kz2 * p[2]) + cos(kz2 * p[2]) * sin(kx2 * p[0]);
 
-            if (dist >= strut)
-            {
-               addFlag(flagField->get(x, y, z), boundaryFlag);
-            }
-         }
-         if (globalCell[0] > end1 && globalCell[0] <= end2)
+         if ( (dist1 * dist2) >= strut)
          {
-            const real_t kxi = kx / real_c(10.0);
-            const real_t kyi = ky / real_c(10.0);
-            const real_t kzi = kz / real_c(10.0);
-
-            const real_t dist = cos(kxi * x) * sin(kyi * y) + cos(kyi * y) * sin(kzi * z) + cos(kzi * z) * sin(kxi * x);
-
-            if (dist >= strut)
-            {
-               addFlag(flagField->get(x, y, z), boundaryFlag);
-            }
+            addFlag(flagField->get(x, y, z), boundaryFlag);
          }
-         )
+      }
+      )
    }
 }
 } // namespace walberla
diff --git a/apps/showcases/PorousMediaGPU/InitializerFunctions.h b/apps/showcases/PorousMediaGPU/InitializerFunctions.h
index 2dcdaf3ae..c3c445a72 100644
--- a/apps/showcases/PorousMediaGPU/InitializerFunctions.h
+++ b/apps/showcases/PorousMediaGPU/InitializerFunctions.h
@@ -26,6 +26,6 @@
 namespace walberla
 {
 void init_TPMS(const shared_ptr< StructuredBlockStorage >& blocks, BlockDataID flagFieldID,
-               FlagUID boundaryFlagUID, real_t kx, real_t ky, real_t kz);
+               FlagUID boundaryFlagUID, real_t waveNumber, real_t scaling);
 
 } // namespace walberla
diff --git a/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp b/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp
index d0d3c0ee0..a3723d888 100644
--- a/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp
+++ b/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp
@@ -113,24 +113,55 @@ int main(int argc, char** argv)
    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
    auto blocks = blockforest::createUniformBlockGridFromConfig(config);
+
+   auto domainParameters       = config->getOneBlock("DomainSetup");
+   const real_t coarseMeshSize = domainParameters.getParameter< real_t >("dx");
+
    auto parameters          = config->getOneBlock("Parameters");
    const uint_t timesteps   = parameters.getParameter< uint_t >("timesteps", uint_c(50));
    const bool gpuEnabledMPI = parameters.getParameter< bool >("gpuEnabledMPI", false);
 
    const real_t reynoldsNumber    = parameters.getParameter< real_t >("reynoldsNumber");
    const real_t referenceVelocity = parameters.getParameter< real_t >("referenceVelocity");
-   const real_t inflowVelocity    = parameters.getParameter< real_t >("latticeVelocity");
+   const real_t referenceLength   = parameters.getParameter< real_t >("referenceLength");
+   const real_t latticeVelocity   = parameters.getParameter< real_t >("latticeVelocity");
+
+   const real_t waveNumber = parameters.getParameter< real_t >("waveNumber");
+   const real_t scaling    = parameters.getParameter< real_t >("scaling");
 
    const real_t speedOfSound = real_c(real_c(1.0) / std::sqrt( real_c(3.0) ));
-   const real_t machNumber = inflowVelocity / speedOfSound;
+   const real_t machNumber = latticeVelocity / speedOfSound;
    // TODO define RE
-   const real_t viscosity  = real_c((inflowVelocity * 1.0) / reynoldsNumber);
-   const real_t omega      = 1.5; // real_c(real_c(1.0) / (real_c(3.0) * viscosity + real_c(0.5)));
+   const real_t viscosity = real_c((latticeVelocity * referenceLength) / reynoldsNumber);
+   const real_t omega     = real_c(real_c(1.0) / (real_c(3.0) * viscosity + real_c(0.5)));
 
-   IDs ids;
+   const real_t dt = latticeVelocity / referenceVelocity * coarseMeshSize;
 
-   // Creating fields
    const StorageSpecification_T StorageSpec = StorageSpecification_T();
+
+
+   WALBERLA_LOG_INFO_ON_ROOT( "Benchmark run data:"
+                              "\n- simulation parameters:"   << std::setprecision(16) <<
+                              "\n   + collision model:     " << infoMap["collisionOperator"] <<
+                              "\n   + stencil:             " << infoMap["stencil"] <<
+                              "\n   + streaming:           " << infoMap["streamingPattern"] <<
+                              "\n   + compressible:        " << ( StorageSpecification_T::compressible ? "yes" : "no" ) <<
+                              "\n   + resolution:          " << coarseMeshSize << " - on the coarsest grid" <<
+                              "\n- simulation properties:  "
+                              "\n   + kin. viscosity:      " << viscosity * coarseMeshSize * coarseMeshSize / dt << " [m^2/s] (on the coarsest grid)" <<
+                              "\n   + viscosity LB:        " << viscosity  << " [dx*dx/dt] (on the coarsest grid)" <<
+                              "\n   + omega:               " << omega << " (on the coarsest grid)" <<
+                              "\n   + inflow velocity:     " << referenceVelocity << " [m/s]" <<
+                              "\n   + lattice velocity:    " << latticeVelocity << " [dx/dt]" <<
+                              "\n   + Reynolds number:     " << reynoldsNumber <<
+                              "\n   + Mach number:         " << machNumber <<
+                              "\n   + dx (coarsest grid):  " << coarseMeshSize << " [m]" <<
+                              "\n   + dt (coarsest grid):  " << dt << " [s]"
+                              "\n   + #time steps:         " << timesteps << " (on the coarsest grid, " << ( real_c(1.0) / dt ) << " for 1s of real time)" <<
+                              "\n   + simulation time:     " << ( real_c( timesteps ) * dt ) << " [s]" )
+
+   // Creating fields
+   IDs ids;
    ids.pdfField  = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(1), field::fzyx);
 
    auto allocator = make_shared< gpu::HostFieldAllocator<macroFieldType> >(); // use pinned memory allocator for faster CPU-GPU memory transfers
@@ -163,13 +194,12 @@ int main(int argc, char** argv)
    {
       WALBERLA_LOG_INFO_ON_ROOT("Setting boundary conditions")
       geometry::initBoundaryHandling< FlagField_T >(*blocks, ids.flagField, boundariesConfig);
-      const real_t test = 6.0; // 'math::pi * real_c(1.0);
-      init_TPMS(blocks, ids.flagField, wallFlagUID, test, test, test);
+      init_TPMS(blocks, ids.flagField, wallFlagUID, waveNumber, scaling);
       geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, ids.flagField, fluidFlagUID);
    }
    geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, ids.flagField, fluidFlagUID);
-   BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfFieldGPU, fluidFlagUID, inflowVelocity);
-   // BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfFieldGPU, fluidFlagUID, inflowVelocity, ids.pdfField);
+   // BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfFieldGPU, fluidFlagUID, latticeVelocity);
+   BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfFieldGPU, fluidFlagUID, latticeVelocity, ids.pdfField);
 
    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    ///                                           COMMUNICATION SCHEME                                             ///
@@ -219,6 +249,17 @@ int main(int argc, char** argv)
       timeLoop.addFuncAfterTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
    }
 
+      // log remaining time
+   auto loggingParameters= config->getOneBlock("Logging");
+   const real_t remainingTimeLoggerFrequency =
+      loggingParameters.getParameter< real_t >("remainingTimeLoggerFrequency", 3.0); // in seconds
+   if (uint_c(remainingTimeLoggerFrequency) > 0)
+   {
+      timeLoop.addFuncAfterTimeStep(
+         timing::RemainingTimeLogger(timeLoop.getNrOfTimeSteps(), remainingTimeLoggerFrequency),
+         "remaining time logger");
+   }
+
    WALBERLA_MPI_BARRIER()
 #if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
    WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
diff --git a/apps/showcases/PorousMediaGPU/PorousMediaGPU.prm b/apps/showcases/PorousMediaGPU/PorousMediaGPU.prm
index 13ae1b72a..25191e286 100644
--- a/apps/showcases/PorousMediaGPU/PorousMediaGPU.prm
+++ b/apps/showcases/PorousMediaGPU/PorousMediaGPU.prm
@@ -3,10 +3,14 @@ Parameters
     reynoldsNumber 2500;
 
     referenceVelocity 1.0;
+    referenceLength 1.0;
     latticeVelocity 0.05;
     initialiseWithInletVelocity true;
 
-    timesteps 5001;
+    waveNumber 6.2831853072;
+    scaling 10.0;
+
+    timesteps 50001;
 
     processMemoryLimit 512; // MiB
     innerOuterSplit <1, 1, 1>;
@@ -18,9 +22,10 @@ Parameters
 //! [domainSetup]
 DomainSetup
 {
-    cellsPerBlock < 256, 64, 64 >;
+    cellsPerBlock < 1536, 192, 192 >;
     blocks    < 1, 1, 1 >;
     periodic    < false, true, true >;
+    dx 0.00520833333333;
 }
 //! [domainSetup]
 
@@ -40,7 +45,7 @@ StabilityChecker
 
 VTKWriter
 {
-    vtkWriteFrequency 1000;
+    vtkWriteFrequency 5000;
     vtkGhostLayers 0;
     velocity true;
     density true;
@@ -60,7 +65,7 @@ Logging
     logLevel info;  // info progress detail tracing
     writeSetupForestAndReturn true; // When only one process is used the decomposition is writen and the program terminates
     readSetupFromFile false;
-    remainingTimeLoggerFrequency 60; // in seconds
+    remainingTimeLoggerFrequency 20; // in seconds
 }
 
 Evaluation
diff --git a/apps/showcases/PorousMediaGPU/PorousMediaGPU.py b/apps/showcases/PorousMediaGPU/PorousMediaGPU.py
index f01f9f538..6e1333dda 100644
--- a/apps/showcases/PorousMediaGPU/PorousMediaGPU.py
+++ b/apps/showcases/PorousMediaGPU/PorousMediaGPU.py
@@ -23,7 +23,7 @@ std::map<std::string, std::string> infoMap{{{{"stencil", "{stencil}"}},
 omega = sp.symbols("omega")
 inlet_velocity = sp.symbols("u_x")
 max_threads = 256
-
+ 
 with CodeGeneration() as ctx:
     target = Target.GPU if ctx.gpu else Target.CPU
     sweep_params = {'block_size': (128, 1, 1)} if ctx.gpu else {}
@@ -34,7 +34,7 @@ with CodeGeneration() as ctx:
     dtype = 'float64' if ctx.double_accuracy else 'float32'
     pdf_dtype = 'float64'
 
-    stencil = LBStencil(Stencil.D3Q19)
+    stencil = LBStencil(Stencil.D3Q27)
     q = stencil.Q
     dim = stencil.D
 
@@ -45,8 +45,8 @@ with CodeGeneration() as ctx:
 
     macroscopic_fields = {'density': density_field, 'velocity': velocity_field}
 
-    # method_enum = Method.CUMULANT
-    method_enum = Method.SRT
+    method_enum = Method.CUMULANT
+    # method_enum = Method.SRT
     lbm_config = LBMConfig(
         method=method_enum,
         stencil=stencil,
@@ -74,15 +74,15 @@ with CodeGeneration() as ctx:
                                  boundary_object=UBB((inlet_velocity, 0.0, 0.0), density=1.0, data_type=dtype),
                                  field_data_type=pdf_dtype)
 
-    # outflow_boundary = ExtrapolationOutflow(stencil[4], lb_method, data_type=pdf_dtype)
-    # outflow = lbm_boundary_generator(class_name='Outflow', flag_uid='Outflow',
-    #                                  boundary_object=outflow_boundary,
-    #                                  field_data_type=pdf_dtype)
-
+    outflow_boundary = ExtrapolationOutflow(stencil[4], lb_method, data_type=pdf_dtype)
     outflow = lbm_boundary_generator(class_name='Outflow', flag_uid='Outflow',
-                                     boundary_object=FixedDensity(1.0),
+                                     boundary_object=outflow_boundary,
                                      field_data_type=pdf_dtype)
 
+    # outflow = lbm_boundary_generator(class_name='Outflow', flag_uid='Outflow',
+    #                                  boundary_object=FixedDensity(1.0),
+    #                                  field_data_type=pdf_dtype)
+
     generate_lbm_package(ctx, name="PorousMediaGPU", collision_rule=collision_rule,
                          lbm_config=lbm_config, lbm_optimisation=lbm_opt,
                          nonuniform=False, boundaries=[no_slip, ubb, outflow],
diff --git a/python/lbmpy_walberla/packing_kernels.py b/python/lbmpy_walberla/packing_kernels.py
index 862a6cdaa..af7be15cc 100644
--- a/python/lbmpy_walberla/packing_kernels.py
+++ b/python/lbmpy_walberla/packing_kernels.py
@@ -103,9 +103,9 @@ class PackingKernelsCodegen:
         self.accessors = [get_accessor(streaming_pattern, t) for t in get_timesteps(streaming_pattern)]
         self.mask_field = fields(f'mask : uint32 [{self.dim}D]', layout=src_field.layout)
 
-        self.block_wise = True
-        if not self.inplace or not self.config.target == Target.GPU:
-            self.block_wise = False
+        # self.block_wise = True
+        # if not self.inplace or not self.config.target == Target.GPU:
+        self.block_wise = False
 
         self.index = TypedSymbol("index", dtype=BasicType(np.int64))
         self.index_shape = TypedSymbol("_size_0", dtype=BasicType(np.int64))
-- 
GitLab