From d26560e2bfd99fb9e47437b2a171cd6f8c5be462 Mon Sep 17 00:00:00 2001 From: markus holzer <markus.holzer@fau.de> Date: Tue, 3 Aug 2021 10:40:28 +0200 Subject: [PATCH] MLUP calculation based on fluid cells only --- apps/benchmarks/FlagFieldGPU/FlagFieldGPU.cpp | 38 +++++++++---- .../FlagFieldGPU/FlagFieldGPUCodeGen.py | 56 ++++++++++--------- 2 files changed, 57 insertions(+), 37 deletions(-) diff --git a/apps/benchmarks/FlagFieldGPU/FlagFieldGPU.cpp b/apps/benchmarks/FlagFieldGPU/FlagFieldGPU.cpp index 3f7e9ceba..e5e2dec54 100644 --- a/apps/benchmarks/FlagFieldGPU/FlagFieldGPU.cpp +++ b/apps/benchmarks/FlagFieldGPU/FlagFieldGPU.cpp @@ -68,7 +68,6 @@ int main(int argc, char** argv) auto blocks = blockforest::createUniformBlockGridFromConfig(config); // read parameters - Vector3< uint_t > cellsPerBlock = config->getBlock("DomainSetup").getParameter< Vector3< uint_t > >("cellsPerBlock"); auto parameters = config->getOneBlock("Parameters"); const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(10)); @@ -77,8 +76,8 @@ int main(int argc, char** argv) const real_t reynolds_number = parameters.getParameter< real_t >("reynolds_number", real_t(1000)); const uint_t diameter_sphere = parameters.getParameter< uint_t >("diameter_sphere", uint_t(5)); - const double remainingTimeLoggerFrequency = - parameters.getParameter< double >("remainingTimeLoggerFrequency", 3.0); // in seconds + const real_t remainingTimeLoggerFrequency = + parameters.getParameter< real_t >("remainingTimeLoggerFrequency", 3.0); // in seconds // create fields BlockDataID pdfFieldID = blocks->addStructuredBlockData< PdfField_T >(pdfFieldAdder, "PDFs"); @@ -91,7 +90,7 @@ int main(int argc, char** argv) BlockDataID densityFieldIDGPU = cuda::addGPUFieldToStorage< ScalarField_T >(blocks, densityFieldID, "density on GPU", true); - BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field"); + BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field"); BlockDataID flagFieldId_gpu = cuda::addGPUFieldToStorage< FlagField_T >(blocks, flagFieldId, "flag on GPU", true); // initialise all PDFs @@ -128,8 +127,7 @@ int main(int argc, char** argv) SweepTimeloop timeloop(blocks->getBlockStorage(), timesteps); // add LBM sweep and communication to time loop - timeloop.add() << BeforeFunction(comm, "communication") - << Sweep(outflow, "outflow boundary"); + timeloop.add() << BeforeFunction(comm, "communication") << Sweep(outflow, "outflow boundary"); timeloop.add() << Sweep(ubb, "ubb boundary"); timeloop.add() << Sweep(noSlip, "noSlip boundary"); timeloop.add() << Sweep(lbSweep, "LB update rule"); @@ -157,7 +155,7 @@ int main(int argc, char** argv) auto velWriter = make_shared< field::VTKWriter< VelocityField_T > >(velFieldID, "velocity"); auto densityWriter = make_shared< field::VTKWriter< ScalarField_T > >(densityFieldID, "density"); - auto flagWriter = make_shared< field::VTKWriter< FlagField_T > >(flagFieldId, "flagField"); + auto flagWriter = make_shared< field::VTKWriter< FlagField_T > >(flagFieldId, "flagField"); vtkOutput->addCellDataWriter(velWriter); vtkOutput->addCellDataWriter(densityWriter); @@ -178,9 +176,29 @@ int main(int argc, char** argv) timeloop.run(); simTimer.end(); WALBERLA_LOG_INFO_ON_ROOT("Simulation finished") - auto time = simTimer.last(); - auto nrOfCells = real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]); - auto mlupsPerProcess = nrOfCells * real_c(timesteps) / time * 1e-6; + auto time = simTimer.last(); + + // get the number of fluid cells on the block + uint64_t nrOfFluidCells = 0; + uint64_t nrOfBoundaryCells = 0; + for (auto& block : *blocks) + { + auto* flagField = block.getData< FlagField_T >(flagFieldId); + auto domainFlag = flagField->getFlag(fluidFlagUID); + + for (auto it = flagField->begin(); it != flagField->end(); ++it) + { + if (isFlagSet(it, domainFlag)) { nrOfFluidCells += 1; } + if (!isFlagSet(it, domainFlag)) { nrOfBoundaryCells += 1; } + } + } + + auto mlupsPerProcess = real_c(nrOfFluidCells) * real_c(timesteps) / time * 1e-6; + + // TODO: when going to multiple GPUs the performance should be measured on each GPU. At the moment only performance + // on root is considered. + WALBERLA_LOG_RESULT_ON_ROOT("Fluid Cells on the block " << nrOfFluidCells) + WALBERLA_LOG_RESULT_ON_ROOT("Boundary Cells on the block " << nrOfBoundaryCells) WALBERLA_LOG_RESULT_ON_ROOT("MLUPS per process " << mlupsPerProcess) WALBERLA_LOG_RESULT_ON_ROOT("Time per time step " << time / real_c(timesteps)) diff --git a/apps/benchmarks/FlagFieldGPU/FlagFieldGPUCodeGen.py b/apps/benchmarks/FlagFieldGPU/FlagFieldGPUCodeGen.py index f20a2f81a..937ec1944 100644 --- a/apps/benchmarks/FlagFieldGPU/FlagFieldGPUCodeGen.py +++ b/apps/benchmarks/FlagFieldGPU/FlagFieldGPUCodeGen.py @@ -13,44 +13,46 @@ from lbmpy_walberla import generate_boundary, generate_lb_pack_info import sympy as sp -stencil = get_stencil("D3Q19") -q = len(stencil) -dim = len(stencil[0]) +with CodeGeneration() as ctx: + dtype = 'float64' if ctx.double_accuracy else 'float32' -pdfs, pdfs_tmp = fields(f"pdfs({q}), pdfs_tmp({q}) : double[{dim}D]", layout='fzyx') -velocity_field, density_field = fields(f"velocity({dim}), density(1) : double[{dim}D]", layout='fzyx') + stencil = get_stencil("D3Q19") + q = len(stencil) + dim = len(stencil[0]) -flag = fields(f"flag_field: uint8[{dim}D]", layout='fzyx') + pdfs, pdfs_tmp = fields(f"pdfs({q}), pdfs_tmp({q}) : {dtype}[{dim}D]", layout='fzyx') + velocity_field, density_field = fields(f"velocity({dim}), density(1) : {dtype}[{dim}D]", layout='fzyx') -omega = sp.Symbol("omega") -u_max = sp.Symbol("u_max") + flag = fields(f"flag_field: uint8[{dim}D]", layout='fzyx') -output = { - 'density': density_field, - 'velocity': velocity_field -} + omega = sp.Symbol("omega") + u_max = sp.Symbol("u_max") -method = create_lb_method(stencil=stencil, method='srt', relaxation_rate=omega, compressible=True) + output = { + 'density': density_field, + 'velocity': velocity_field + } -update_rule = create_lb_update_rule(lb_method=method, - output=output, - optimization={"symbolic_field": pdfs, - "symbolic_temporary_field": pdfs_tmp}, - kernel_type='stream_pull_collide') + method = create_lb_method(stencil=stencil, method='srt', relaxation_rate=omega, compressible=True) -update_rule = [Conditional(sp.Eq(flag.center(), 8), Block(update_rule))] + update_rule = create_lb_update_rule(lb_method=method, + output=output, + optimization={"symbolic_field": pdfs, + "symbolic_temporary_field": pdfs_tmp, + "double_precision": True if ctx.double_accuracy else False}, + kernel_type='stream_pull_collide') + update_rule = [Conditional(sp.Eq(flag.center(), 8), Block(update_rule))] -# getter & setter -setter_assignments = macroscopic_values_setter(method, velocity=velocity_field.center_vector, - pdfs=pdfs, density=1.0) + # getter & setter + setter_assignments = macroscopic_values_setter(method, velocity=velocity_field.center_vector, + pdfs=pdfs, density=1.0) -stencil_typedefs = {'Stencil_T': stencil} -field_typedefs = {'PdfField_T': pdfs, - 'VelocityField_T': velocity_field, - 'ScalarField_T': density_field} + stencil_typedefs = {'Stencil_T': stencil} + field_typedefs = {'PdfField_T': pdfs, + 'VelocityField_T': velocity_field, + 'ScalarField_T': density_field} -with CodeGeneration() as ctx: target = 'gpu' # sweeps -- GitLab