From d26560e2bfd99fb9e47437b2a171cd6f8c5be462 Mon Sep 17 00:00:00 2001
From: markus holzer <markus.holzer@fau.de>
Date: Tue, 3 Aug 2021 10:40:28 +0200
Subject: [PATCH] MLUP calculation based on fluid cells only

---
 apps/benchmarks/FlagFieldGPU/FlagFieldGPU.cpp | 38 +++++++++----
 .../FlagFieldGPU/FlagFieldGPUCodeGen.py       | 56 ++++++++++---------
 2 files changed, 57 insertions(+), 37 deletions(-)

diff --git a/apps/benchmarks/FlagFieldGPU/FlagFieldGPU.cpp b/apps/benchmarks/FlagFieldGPU/FlagFieldGPU.cpp
index 3f7e9ceba..e5e2dec54 100644
--- a/apps/benchmarks/FlagFieldGPU/FlagFieldGPU.cpp
+++ b/apps/benchmarks/FlagFieldGPU/FlagFieldGPU.cpp
@@ -68,7 +68,6 @@ int main(int argc, char** argv)
    auto blocks = blockforest::createUniformBlockGridFromConfig(config);
 
    // read parameters
-   Vector3< uint_t > cellsPerBlock = config->getBlock("DomainSetup").getParameter< Vector3< uint_t > >("cellsPerBlock");
    auto parameters                 = config->getOneBlock("Parameters");
 
    const uint_t timesteps       = parameters.getParameter< uint_t >("timesteps", uint_c(10));
@@ -77,8 +76,8 @@ int main(int argc, char** argv)
    const real_t reynolds_number = parameters.getParameter< real_t >("reynolds_number", real_t(1000));
    const uint_t diameter_sphere = parameters.getParameter< uint_t >("diameter_sphere", uint_t(5));
 
-   const double remainingTimeLoggerFrequency =
-      parameters.getParameter< double >("remainingTimeLoggerFrequency", 3.0); // in seconds
+   const real_t remainingTimeLoggerFrequency =
+      parameters.getParameter< real_t >("remainingTimeLoggerFrequency", 3.0); // in seconds
 
    // create fields
    BlockDataID pdfFieldID     = blocks->addStructuredBlockData< PdfField_T >(pdfFieldAdder, "PDFs");
@@ -91,7 +90,7 @@ int main(int argc, char** argv)
    BlockDataID densityFieldIDGPU =
       cuda::addGPUFieldToStorage< ScalarField_T >(blocks, densityFieldID, "density on GPU", true);
 
-   BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field");
+   BlockDataID flagFieldId     = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field");
    BlockDataID flagFieldId_gpu = cuda::addGPUFieldToStorage< FlagField_T >(blocks, flagFieldId, "flag on GPU", true);
 
    // initialise all PDFs
@@ -128,8 +127,7 @@ int main(int argc, char** argv)
    SweepTimeloop timeloop(blocks->getBlockStorage(), timesteps);
 
    // add LBM sweep and communication to time loop
-   timeloop.add() << BeforeFunction(comm, "communication")
-                  << Sweep(outflow, "outflow boundary");
+   timeloop.add() << BeforeFunction(comm, "communication") << Sweep(outflow, "outflow boundary");
    timeloop.add() << Sweep(ubb, "ubb boundary");
    timeloop.add() << Sweep(noSlip, "noSlip boundary");
    timeloop.add() << Sweep(lbSweep, "LB update rule");
@@ -157,7 +155,7 @@ int main(int argc, char** argv)
 
       auto velWriter     = make_shared< field::VTKWriter< VelocityField_T > >(velFieldID, "velocity");
       auto densityWriter = make_shared< field::VTKWriter< ScalarField_T > >(densityFieldID, "density");
-      auto flagWriter = make_shared< field::VTKWriter< FlagField_T > >(flagFieldId, "flagField");
+      auto flagWriter    = make_shared< field::VTKWriter< FlagField_T > >(flagFieldId, "flagField");
 
       vtkOutput->addCellDataWriter(velWriter);
       vtkOutput->addCellDataWriter(densityWriter);
@@ -178,9 +176,29 @@ int main(int argc, char** argv)
    timeloop.run();
    simTimer.end();
    WALBERLA_LOG_INFO_ON_ROOT("Simulation finished")
-   auto time            = simTimer.last();
-   auto nrOfCells       = real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]);
-   auto mlupsPerProcess = nrOfCells * real_c(timesteps) / time * 1e-6;
+   auto time = simTimer.last();
+
+   // get the number of fluid cells on the block
+   uint64_t nrOfFluidCells = 0;
+   uint64_t nrOfBoundaryCells = 0;
+   for (auto& block : *blocks)
+   {
+      auto* flagField = block.getData< FlagField_T >(flagFieldId);
+      auto domainFlag = flagField->getFlag(fluidFlagUID);
+
+      for (auto it = flagField->begin(); it != flagField->end(); ++it)
+      {
+         if (isFlagSet(it, domainFlag)) { nrOfFluidCells += 1; }
+         if (!isFlagSet(it, domainFlag)) { nrOfBoundaryCells += 1; }
+      }
+   }
+
+   auto mlupsPerProcess = real_c(nrOfFluidCells) * real_c(timesteps) / time * 1e-6;
+
+   // TODO: when going to multiple GPUs the performance should be measured on each GPU. At the moment only performance
+   // on root is considered.
+   WALBERLA_LOG_RESULT_ON_ROOT("Fluid Cells on the block " << nrOfFluidCells)
+   WALBERLA_LOG_RESULT_ON_ROOT("Boundary Cells on the block " << nrOfBoundaryCells)
    WALBERLA_LOG_RESULT_ON_ROOT("MLUPS per process " << mlupsPerProcess)
    WALBERLA_LOG_RESULT_ON_ROOT("Time per time step " << time / real_c(timesteps))
 
diff --git a/apps/benchmarks/FlagFieldGPU/FlagFieldGPUCodeGen.py b/apps/benchmarks/FlagFieldGPU/FlagFieldGPUCodeGen.py
index f20a2f81a..937ec1944 100644
--- a/apps/benchmarks/FlagFieldGPU/FlagFieldGPUCodeGen.py
+++ b/apps/benchmarks/FlagFieldGPU/FlagFieldGPUCodeGen.py
@@ -13,44 +13,46 @@ from lbmpy_walberla import generate_boundary, generate_lb_pack_info
 
 import sympy as sp
 
-stencil = get_stencil("D3Q19")
-q = len(stencil)
-dim = len(stencil[0])
+with CodeGeneration() as ctx:
+    dtype = 'float64' if ctx.double_accuracy else 'float32'
 
-pdfs, pdfs_tmp = fields(f"pdfs({q}), pdfs_tmp({q}) : double[{dim}D]", layout='fzyx')
-velocity_field, density_field = fields(f"velocity({dim}), density(1) : double[{dim}D]", layout='fzyx')
+    stencil = get_stencil("D3Q19")
+    q = len(stencil)
+    dim = len(stencil[0])
 
-flag = fields(f"flag_field: uint8[{dim}D]", layout='fzyx')
+    pdfs, pdfs_tmp = fields(f"pdfs({q}), pdfs_tmp({q}) : {dtype}[{dim}D]", layout='fzyx')
+    velocity_field, density_field = fields(f"velocity({dim}), density(1) : {dtype}[{dim}D]", layout='fzyx')
 
-omega = sp.Symbol("omega")
-u_max = sp.Symbol("u_max")
+    flag = fields(f"flag_field: uint8[{dim}D]", layout='fzyx')
 
-output = {
-    'density': density_field,
-    'velocity': velocity_field
-}
+    omega = sp.Symbol("omega")
+    u_max = sp.Symbol("u_max")
 
-method = create_lb_method(stencil=stencil, method='srt', relaxation_rate=omega, compressible=True)
+    output = {
+        'density': density_field,
+        'velocity': velocity_field
+    }
 
-update_rule = create_lb_update_rule(lb_method=method,
-                                    output=output,
-                                    optimization={"symbolic_field": pdfs,
-                                                  "symbolic_temporary_field": pdfs_tmp},
-                                    kernel_type='stream_pull_collide')
+    method = create_lb_method(stencil=stencil, method='srt', relaxation_rate=omega, compressible=True)
 
-update_rule = [Conditional(sp.Eq(flag.center(), 8), Block(update_rule))]
+    update_rule = create_lb_update_rule(lb_method=method,
+                                        output=output,
+                                        optimization={"symbolic_field": pdfs,
+                                                      "symbolic_temporary_field": pdfs_tmp,
+                                                      "double_precision": True if ctx.double_accuracy else False},
+                                        kernel_type='stream_pull_collide')
 
+    update_rule = [Conditional(sp.Eq(flag.center(), 8), Block(update_rule))]
 
-# getter & setter
-setter_assignments = macroscopic_values_setter(method, velocity=velocity_field.center_vector,
-                                               pdfs=pdfs, density=1.0)
+    # getter & setter
+    setter_assignments = macroscopic_values_setter(method, velocity=velocity_field.center_vector,
+                                                   pdfs=pdfs, density=1.0)
 
-stencil_typedefs = {'Stencil_T': stencil}
-field_typedefs = {'PdfField_T': pdfs,
-                  'VelocityField_T': velocity_field,
-                  'ScalarField_T': density_field}
+    stencil_typedefs = {'Stencil_T': stencil}
+    field_typedefs = {'PdfField_T': pdfs,
+                      'VelocityField_T': velocity_field,
+                      'ScalarField_T': density_field}
 
-with CodeGeneration() as ctx:
     target = 'gpu'
 
     # sweeps
-- 
GitLab