From 6138d5b86cb9044cb3898994d443a8c0c85b5069 Mon Sep 17 00:00:00 2001 From: Markus Holzer <mholzer@ldmpe865h.onecert.fr> Date: Fri, 21 Mar 2025 16:05:17 +0100 Subject: [PATCH] [skip ci] Update --- CMakeLists.txt | 1 + apps/showcases/PorousMediaGPU/CMakeLists.txt | 5 + .../PorousMediaGPU/DragEvaluation.cpp | 170 ++++++++++-------- .../showcases/PorousMediaGPU/DragEvaluation.h | 3 + .../PorousMediaGPU/InitializerFunctions.cpp | 35 ++++ apps/showcases/PorousMediaGPU/JinCase.prm | 43 ++--- .../PorousMediaGPU/PorousMediaGPU.cpp | 166 ++++++++++------- apps/showcases/PorousMediaGPU/Probe.cpp | 97 +++++----- apps/showcases/PorousMediaGPU/Probe.h | 5 +- apps/showcases/PorousMediaGPU/Setup.h | 21 ++- apps/showcases/PorousMediaGPU/TPMS.prm | 16 +- apps/showcases/PorousMediaGPU/roughWall.prm | 1 + apps/showcases/PorousMediaGPU/utilGPU.cu | 12 +- apps/showcases/PorousMediaGPU/utilGPU.h | 45 ++++- 14 files changed, 388 insertions(+), 232 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7b21f6268..ec69c641d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1069,6 +1069,7 @@ if ( WALBERLA_BUILD_WITH_CUDA ) endif() enable_language(CUDA) + find_package(CUDAToolkit REQUIRED) #include directories and cudart lib is needed for cpp files that use cuda headers/libs include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) diff --git a/apps/showcases/PorousMediaGPU/CMakeLists.txt b/apps/showcases/PorousMediaGPU/CMakeLists.txt index 9b7a4e28a..b97dc1247 100644 --- a/apps/showcases/PorousMediaGPU/CMakeLists.txt +++ b/apps/showcases/PorousMediaGPU/CMakeLists.txt @@ -23,6 +23,11 @@ if (WALBERLA_BUILD_WITH_CUDA OR WALBERLA_BUILD_WITH_HIP) waLBerla_add_executable ( NAME PorousMediaGPU FILES PorousMediaGPU.cpp GridGeneration.h Setup.h Types.h DragEvaluation.cpp InitializerFunctions.cpp TPMS.cpp Inflow.cpp Probe.cpp util.cpp utilGPU.cu DEPENDS blockforest boundary core field gpu io lbm_generated postprocessing stencil timeloop vtk PorousMediaGPUCodeGen ) + +if(TARGET CUDA::curand) + target_link_libraries(PorousMediaGPU CUDA::curand) +endif() + else() waLBerla_add_executable ( NAME PorousMediaGPU FILES PorousMediaGPU.cpp GridGeneration.h Setup.h Types.h DragEvaluation.cpp InitializerFunctions.cpp TPMS.cpp Inflow.cpp Probe.cpp util.cpp diff --git a/apps/showcases/PorousMediaGPU/DragEvaluation.cpp b/apps/showcases/PorousMediaGPU/DragEvaluation.cpp index 0617986e5..8d84b7906 100644 --- a/apps/showcases/PorousMediaGPU/DragEvaluation.cpp +++ b/apps/showcases/PorousMediaGPU/DragEvaluation.cpp @@ -61,29 +61,28 @@ DragEvaluation::DragEvaluation(std::shared_ptr< StructuredBlockForest >& blocks, { if( filesystem::exists( hdfFilePath ) ) std::remove( hdfFilePath.string().c_str() ); - } - extendSize[0] = 0; - selectStart[0] = 0; - H5::FileAccPropList fileAccProps; -#ifdef WALBERLA_BUILD_WITH_MPI - // Enable MPI-IO - H5Pset_fapl_mpio(fileAccProps.getId(), mpi::MPIManager::instance()->comm(), MPI_INFO_NULL); -#endif - h5file_ = std::make_unique<H5::H5File>(hdfFilePath.c_str(), H5F_ACC_CREAT | H5F_ACC_RDWR, H5::FileCreatPropList::DEFAULT, fileAccProps); - H5::Group dataGroup = h5file_->createGroup("Data"); + extendSize[0] = 0; + selectStart[0] = 0; + H5::FileAccPropList fileAccProps; + h5file_ = std::make_unique<H5::H5File>(hdfFilePath.c_str(), H5F_ACC_CREAT | H5F_ACC_RDWR, H5::FileCreatPropList::DEFAULT, fileAccProps); + H5::Group dataGroup = h5file_->createGroup("Data"); - const hsize_t dsetSize[1]{ 1 }; - const hsize_t maxSize[1]{ H5S_UNLIMITED }; - const hsize_t chunkSize[1]{writeCounter_}; + const hsize_t dsetSize[1]{ 1 }; + const hsize_t maxSize[1]{ H5S_UNLIMITED }; + const hsize_t chunkSize[1]{writeCounter_}; - H5::DataSpace arrayDataSpace(1, dsetSize, maxSize); - H5::DSetCreatPropList props; - props.setChunk(1, chunkSize); + H5::DataSpace arrayDataSpace(1, dsetSize, maxSize); + H5::DSetCreatPropList props; + props.setChunk(1, chunkSize); + + for (auto quantitie = quantities_.begin(); quantitie != quantities_.end(); quantitie++){ + dataGroup.createDataSet(quantitie->first, io::h5Float(), arrayDataSpace, props); + } - for (auto quantitie = quantities_.begin(); quantitie != quantities_.end(); quantitie++){ - dataGroup.createDataSet(quantitie->first, io::h5Float(), arrayDataSpace, props); } + WALBERLA_MPI_BARRIER() + dragResults_.resize(quantities_.size() * writeCounter_); auto domain = blocks->getDomain(); @@ -142,7 +141,10 @@ void DragEvaluation::forceCalculation(const uint_t level) force_ += Vector3<double>(boundaryCollection_.ObstacleObject->getForce(b)); } - mpi::reduceInplace(force_, mpi::SUM); + // Gather force on root rank and distrute result to all other processes + mpi::reduceInplace(force_, mpi::SUM, 0, MPIManager::instance()->comm()); + mpi::broadcastObject(force_, 0, MPIManager::instance()->comm()); + WALBERLA_ROOT_SECTION(){ const double t = double_c(totalExecutionCounter_ - 1) * dt_; const double meanU2 = double_c(meanVelocity_) * double_c(meanVelocity_); @@ -167,7 +169,9 @@ void DragEvaluation::forceCalculation(const uint_t level) ++meanQuantitiesCounter_; ++executionCounter_; if(executionCounter_ >= writeCounter_ || totalExecutionCounter_ == timesteps_){ - writeResults(); + WALBERLA_ROOT_SECTION(){ + writeResults(); + } } } } @@ -209,49 +213,49 @@ Plotter::Plotter(std::shared_ptr< StructuredBlockForest >& blocks, const Setup& { if( filesystem::exists( hdfFilePath ) ) std::remove( hdfFilePath.string().c_str() ); - } - H5::FileAccPropList fileAccProps; -#ifdef WALBERLA_BUILD_WITH_MPI - // Enable MPI-IO - H5Pset_fapl_mpio(fileAccProps.getId(), mpi::MPIManager::instance()->comm(), MPI_INFO_NULL); -#endif - h5file_ = std::make_unique<H5::H5File>(hdfFilePath.c_str(), H5F_ACC_CREAT | H5F_ACC_RDWR, H5::FileCreatPropList::DEFAULT, fileAccProps); - H5::Group dataGroup = h5file_->createGroup("Data"); + H5::FileAccPropList fileAccProps; + h5file_ = std::make_unique<H5::H5File>(hdfFilePath.c_str(), H5F_ACC_CREAT | H5F_ACC_RDWR, H5::FileCreatPropList::DEFAULT, fileAccProps); + H5::Group dataGroup = h5file_->createGroup("Data"); + } } void Plotter::writeResult(const real_t frictionVelocity, const real_t welfordCounter){ - auto finalDomain = blocks_->getDomain(); - - std::map<real_t, uint_t> centerToIntex; - uint_t idx = 0; - - Vector3<real_t> p = finalDomain.min(); - IBlock* block = blocks_->getBlock(p); - while(block){ - const uint_t ny = blocks_->getNumberOfYCells(*block); + std::set<real_t> localYCoordinates; + for (auto& block : *blocks_){ + const uint_t ny = blocks_->getNumberOfYCells(block); for(uint_t y = 0; y < ny; ++y){ - const Cell localCell(cell_idx_c(0), cell_idx_c(y), cell_idx_c(0)); - Vector3<real_t> globalCellCenter; - blocks_->getBlockLocalCellCenter(*block, localCell, globalCellCenter); - centerToIntex[globalCellCenter[1]] = idx; - idx++; - } - p[1] = block->getAABB().yMax() + setup_.dxF; - block = blocks_->getBlock(p); - if(block == nullptr){ - WALBERLA_CHECK_GREATER(p[1], finalDomain.yMax(), "This method does is not yet implemented for MPI parallel simulations") + const Cell localCell(cell_idx_c(0), cell_idx_c(y), cell_idx_c(0)); + Vector3<real_t> globalCellCenter; + blocks_->getBlockLocalCellCenter(block, localCell, globalCellCenter); + localYCoordinates.insert(globalCellCenter[1]); } } - const real_t viscosity = setup_.viscosity; + std::vector<real_t> allYCoordinates(localYCoordinates.begin(), localYCoordinates.end()); + + // gather all coordinates on the root rank and broadcast to all other ranks + allYCoordinates = mpi::gatherv(allYCoordinates, 0, MPIManager::instance()->comm()); + mpi::broadcastObject(allYCoordinates, 0, MPIManager::instance()->comm()); + + std::sort( allYCoordinates.begin(), allYCoordinates.end() ); + allYCoordinates.erase( std::unique( allYCoordinates.begin(), allYCoordinates.end() ), allYCoordinates.end() ); + + std::map<real_t, uint_t> centerToIntex; + for(uint_t idx = 0; idx < allYCoordinates.size(); ++idx){ + centerToIntex[allYCoordinates[idx]] = idx; + } + + const real_t viscosity = setup_.kinViscosity; const real_t channelHalfHeight = setup_.referenceLength; - addAttribute(viscosity, "viscosity"); - addAttribute(channelHalfHeight, "channelHalfHeight"); - addAttribute(frictionVelocity, "frictionVelocity"); - addAttribute(welfordCounter, "welfordCounter"); + WALBERLA_ROOT_SECTION(){ + addAttribute(viscosity, "viscosity"); + addAttribute(channelHalfHeight, "channelHalfHeight"); + addAttribute(frictionVelocity, "frictionVelocity"); + addAttribute(welfordCounter, "welfordCounter"); + } std::vector<real_t> yCellCenter(centerToIntex.size(), real_c(0.0)); std::vector<real_t> yRel(centerToIntex.size(), real_c(0.0)); @@ -285,7 +289,7 @@ void Plotter::writeResult(const real_t frictionVelocity, const real_t welfordCou WALBERLA_ABORT("y coordinate of center to index map was not contained") } - idx = centerToIntex[globalCellCenter[1]]; + const uint_t idx = centerToIntex[globalCellCenter[1]]; cellCounterI[idx]++; if(isFlagSet(flagField->get(*cellIt), domainFlag)){ @@ -294,40 +298,50 @@ void Plotter::writeResult(const real_t frictionVelocity, const real_t welfordCou meanVelocityI[idx] += meanVelocityField->get(*cellIt, 0); for(uint_t i = 0; i < TensorField_T::F_SIZE; ++i) { - reynoldsStressVector[i * centerToIntex.size() + idx] = sosField->get(*cellIt, i) / welfordCounter; + reynoldsStressVector[i * centerToIntex.size() + idx] += sosField->get(*cellIt, i) / welfordCounter; } } } } - for(uint_t n = 0; n < centerToIntex.size(); ++n){ - meanVelocityI[n] /= real_c(cellCounterI[n]); - meanVelocityE[n] /= real_c(cellCounterE[n]); + mpi::reduceInplace(cellCounterI, mpi::SUM, 0, MPIManager::instance()->comm()); + mpi::reduceInplace(cellCounterE, mpi::SUM, 0, MPIManager::instance()->comm()); + mpi::reduceInplace(meanVelocityE, mpi::SUM, 0, MPIManager::instance()->comm()); + mpi::reduceInplace(meanVelocityI, mpi::SUM, 0, MPIManager::instance()->comm()); + mpi::reduceInplace(reynoldsStressVector, mpi::SUM, 0, MPIManager::instance()->comm()); + - for(uint_t i = 0; i < TensorField_T::F_SIZE; ++i) { - reynoldsStressVector[i * centerToIntex.size() + n] /= real_c(cellCounterE[n]); + WALBERLA_ROOT_SECTION(){ + for(uint_t n = 0; n < centerToIntex.size(); ++n){ + meanVelocityI[n] /= real_c(cellCounterI[n]); + meanVelocityE[n] /= real_c(cellCounterE[n]); + + for(uint_t i = 0; i < TensorField_T::F_SIZE; ++i) { + reynoldsStressVector[i * centerToIntex.size() + n] /= real_c(cellCounterE[n]); + } + + uPlusI[n] = meanVelocityI[n] / frictionVelocity; + uPlusE[n] = meanVelocityE[n] / frictionVelocity; } - uPlusI[n] = meanVelocityI[n] / frictionVelocity; - uPlusE[n] = meanVelocityE[n] / frictionVelocity; - } + writeData(yCellCenter.data(), centerToIntex.size(), "y"); + writeData(yRel.data(), centerToIntex.size(), "yRel"); + writeData(yPlus.data(), centerToIntex.size(), "yPlus"); + writeData(meanVelocityI.data(), centerToIntex.size(), "meanVelocityI"); + writeData(meanVelocityE.data(), centerToIntex.size(), "meanVelocityE"); + writeData(uPlusI.data(), centerToIntex.size(), "uPlusI"); + writeData(uPlusE.data(), centerToIntex.size(), "uPlusE"); + writeData(&reynoldsStressVector[0 * centerToIntex.size()], centerToIntex.size(), "uu_rms"); + writeData(&reynoldsStressVector[1 * centerToIntex.size()], centerToIntex.size(), "uv_rms"); + writeData(&reynoldsStressVector[2 * centerToIntex.size()], centerToIntex.size(), "uw_rms"); + writeData(&reynoldsStressVector[3 * centerToIntex.size()], centerToIntex.size(), "vu_rms"); + writeData(&reynoldsStressVector[4 * centerToIntex.size()], centerToIntex.size(), "vv_rms"); + writeData(&reynoldsStressVector[5 * centerToIntex.size()], centerToIntex.size(), "vw_rms"); + writeData(&reynoldsStressVector[6 * centerToIntex.size()], centerToIntex.size(), "wu_rms"); + writeData(&reynoldsStressVector[7 * centerToIntex.size()], centerToIntex.size(), "wv_rms"); + writeData(&reynoldsStressVector[8 * centerToIntex.size()], centerToIntex.size(), "ww_rms"); - writeData(yCellCenter.data(), centerToIntex.size(), "y"); - writeData(yRel.data(), centerToIntex.size(), "yRel"); - writeData(yPlus.data(), centerToIntex.size(), "yPlus"); - writeData(meanVelocityI.data(), centerToIntex.size(), "meanVelocityI"); - writeData(meanVelocityE.data(), centerToIntex.size(), "meanVelocityE"); - writeData(uPlusI.data(), centerToIntex.size(), "uPlusI"); - writeData(uPlusE.data(), centerToIntex.size(), "uPlusE"); - writeData(&reynoldsStressVector[0 * centerToIntex.size()], centerToIntex.size(), "uu_rms"); - writeData(&reynoldsStressVector[1 * centerToIntex.size()], centerToIntex.size(), "uv_rms"); - writeData(&reynoldsStressVector[2 * centerToIntex.size()], centerToIntex.size(), "uw_rms"); - writeData(&reynoldsStressVector[3 * centerToIntex.size()], centerToIntex.size(), "vu_rms"); - writeData(&reynoldsStressVector[4 * centerToIntex.size()], centerToIntex.size(), "vv_rms"); - writeData(&reynoldsStressVector[5 * centerToIntex.size()], centerToIntex.size(), "vw_rms"); - writeData(&reynoldsStressVector[6 * centerToIntex.size()], centerToIntex.size(), "wu_rms"); - writeData(&reynoldsStressVector[7 * centerToIntex.size()], centerToIntex.size(), "wv_rms"); - writeData(&reynoldsStressVector[8 * centerToIntex.size()], centerToIntex.size(), "ww_rms"); + } } void Plotter::writeData(const real_t* data, const uint_t size, const std::string& dataSetName){ diff --git a/apps/showcases/PorousMediaGPU/DragEvaluation.h b/apps/showcases/PorousMediaGPU/DragEvaluation.h index 4d621325b..a344180b4 100644 --- a/apps/showcases/PorousMediaGPU/DragEvaluation.h +++ b/apps/showcases/PorousMediaGPU/DragEvaluation.h @@ -24,6 +24,9 @@ #include "core/Filesystem.h" #include "core/math/Sample.h" #include "core/math/Vector3.h" +#include "core/mpi/Broadcast.h" +#include "core/mpi/Gatherv.h" +#include "core/mpi/Reduce.h" #include "lbm_generated/field/PdfField.h" diff --git a/apps/showcases/PorousMediaGPU/InitializerFunctions.cpp b/apps/showcases/PorousMediaGPU/InitializerFunctions.cpp index fcae3b718..6c45517a9 100644 --- a/apps/showcases/PorousMediaGPU/InitializerFunctions.cpp +++ b/apps/showcases/PorousMediaGPU/InitializerFunctions.cpp @@ -94,6 +94,41 @@ void initHemisphers(const shared_ptr< StructuredBlockStorage >& blocks, const Bl } } +void initGenericPorousMatrix(const shared_ptr< StructuredBlockStorage >& blocks, const BlockDataID flagFieldID, const Setup& setup, + const Config::BlockHandle& gpmParameters){ + + const real_t poreSize = gpmParameters.getParameter< real_t >("poreSize"); + const real_t barSize = gpmParameters.getParameter< real_t >("barSize"); + const real_t halfPoreSize = poreSize * real_c(0.5); + const real_t PoreSize2 = poreSize * real_c(2.0); + const real_t halfbarSize = barSize * real_c(0.5); + + auto domain = blocks->getDomain(); + const real_t zMin = domain.zMin(); + const real_t zMax = domain.zMax(); + + for (auto bIt = blocks->begin(); bIt != blocks->end(); ++bIt){ + Block& b = dynamic_cast< Block& >(*bIt); + const auto flagField = b.getData< FlagField_T >(flagFieldID); + const auto boundaryFlag = flagField->getOrRegisterFlag(setup.noSlipUID); + + for( auto it = flagField->beginWithGhostLayer(0); it != flagField->end(); ++it ){ + Vector3< real_t > cellCenter = blocks->getBlockLocalCellCenter( b, it.cell() ); + blocks->mapToPeriodicDomain(cellCenter); + + const real_t xc = std::round(cellCenter[0] / poreSize) * poreSize; + const real_t ys = real_c((uint_c(std::round(cellCenter[0] / poreSize)) % 2)) * poreSize; + const real_t yc = (std::round((cellCenter[1] - ys) / PoreSize2) * (PoreSize2)) + ys; + + const AABB barAABB(xc - halfbarSize, yc - halfbarSize, zMin, xc + halfbarSize, yc + halfbarSize, zMax); + if(barAABB.contains(cellCenter)) { + addFlag(flagField->get(it.x(), it.y(), it.z()), boundaryFlag); + } + + } + } +} + void setVelocityFieldsAsmuth(const shared_ptr< StructuredBlockStorage >& blocks, const Setup& setup, const IDs& ids) { diff --git a/apps/showcases/PorousMediaGPU/JinCase.prm b/apps/showcases/PorousMediaGPU/JinCase.prm index 8215f12bf..6ace3c854 100644 --- a/apps/showcases/PorousMediaGPU/JinCase.prm +++ b/apps/showcases/PorousMediaGPU/JinCase.prm @@ -1,16 +1,13 @@ Parameters { - reynoldsNumber 400; - bulkReynoldsNumber 4466; + reynoldsNumber 700; latticeVelocity 0.05; initialiseWithlatticeVelocity true; - initialiseWithTurbulence true; - forceUpdateFrequency 10; + initialiseWithTurbulence false; + forceUpdateFrequency 0; + pressureGradient 0.188; - welfordSamplingStart 200000; - welfordSamplingFrequency 1; - - timesteps 10; + timesteps 100001; processMemoryLimit 512; // MiB innerOuterSplit <1, 1, 1>; @@ -29,23 +26,19 @@ Turbulence U_c < 2.0, 0.0, 0.0 >; } -RoughWall +genericPorousMatrix { - hemispheresRadius 1; - hemispheresDistance 4; + poreSize 2; + barSize 1; } DomainSetup { - domainSize < 48.4, 40, 12 >; - cellsPerBlock < 121, 10, 30 >; - blocks < 1, 10, 1 >; - - // domainSize < 48.4, 40, 12 >; - // cellsPerBlock < 242, 20, 60 >; - // blocks < 1, 10, 1 >; + domainSize < 12, 8, 4 >; + cellsPerBlock < 960, 640, 320 >; + blocks < 1, 1, 1 >; - periodic < true, false, true >; + periodic < true, true, true >; refinementLevels 0; numberProcesses 1; // This is for load balancing, overwritten if more than one proc is used blockForestFilestem porousMediaBlockForest; @@ -54,10 +47,8 @@ DomainSetup Boundaries { - Border { direction N; walldistance -1; flag Obstacle; } - Border { direction S; walldistance -1; flag Obstacle; } -} +} StabilityChecker { @@ -68,7 +59,7 @@ StabilityChecker VTKWriter { - vtkWriteFrequency 2000; + vtkWriteFrequency 10000; vtkGhostLayers 0; velocity true; writeVelocityAsMagnitude false; @@ -92,7 +83,7 @@ Logging remainingTimeLoggerFrequency 20; // in seconds writeSurfaceMeshOfTPMS false; outputFolder output; - filestem Re400; + filestem Jin; } Probes @@ -103,6 +94,6 @@ Probes DragEvaluation { - evaluationStart 200000; - evaluationFrequency 10; + evaluationStart 0; + evaluationFrequency 0; } \ No newline at end of file diff --git a/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp b/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp index 35af02ce8..2d70f31d3 100644 --- a/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp +++ b/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp @@ -285,15 +285,11 @@ int main(int argc, char** argv){ auto allocator = make_shared< gpu::HostFieldAllocator<macroFieldType> >(); // use pinned memory allocator for faster CPU-GPU memory transfers ids.velocityField = field::addToStorage< VelocityField_T >(blocks, "vel", macroFieldType(0.0), field::fzyx, setup.numGhostLayers, allocator); - ids.meanVelocityField = field::addToStorage< VelocityField_T >(blocks, "mean velocity", macroFieldType(0.0), field::fzyx, setup.numGhostLayers, allocator); - ids.sosField = field::addToStorage< TensorField_T >(blocks, "sos velocity", macroFieldType(0.0), field::fzyx, setup.numGhostLayers, allocator); ids.densityField = field::addToStorage< ScalarField_T >(blocks, "density", macroFieldType(1.0), field::fzyx, setup.numGhostLayers, allocator); ids.flagField = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field", setup.numGhostLayers); ids.pdfFieldGPU = lbm_generated::addGPUPdfFieldToStorage< PdfField_T >(blocks, ids.pdfField, StorageSpec, "pdfs on GPU", true); ids.velocityFieldGPU = gpu::addGPUFieldToStorage< VelocityField_T >(blocks, ids.velocityField, "velocity on GPU", true); - ids.meanVelocityFieldGPU = gpu::addGPUFieldToStorage< VelocityField_T >(blocks, ids.meanVelocityField, "mean velocity on GPU", true); - ids.sosFieldGPU = gpu::addGPUFieldToStorage< TensorField_T >(blocks, ids.sosField, "sos field on GPU", true); ids.densityFieldGPU = gpu::addGPUFieldToStorage< ScalarField_T >(blocks, ids.densityField, "velocity on GPU", true); ids.flagFieldGPU = gpu::addGPUFieldToStorage< FlagField_T >(blocks, ids.flagField, "flag on GPU", true); @@ -303,7 +299,6 @@ int main(int argc, char** argv){ const Cell innerOuterSplit = Cell(parameters.getParameter< Vector3<cell_idx_t> >("innerOuterSplit", Vector3<cell_idx_t>(1, 1, 1))); SweepCollection_T sweepCollection(blocks, ids.densityFieldGPU, ids.pdfFieldGPU, ids.velocityFieldGPU, setup.drivingForce, setup.omega, innerOuterSplit); - WelfordSweep_T welfordSweep(ids.meanVelocityFieldGPU, ids.sosFieldGPU, ids.velocityFieldGPU, real_c(0.0)); WALBERLA_LOG_INFO_ON_ROOT(++step << ". Initialising data") if(setup.initialiseWithlatticeVelocity){ @@ -330,30 +325,6 @@ int main(int argc, char** argv){ sweepCollection.initialiseBlockPointer(); gpu::fieldCpy< gpu::GPUField< VelocityField_T::value_type >, VelocityField_T >(blocks, ids.velocityFieldInflowSliceGPU, ids.velocityFieldInflowSlice); - auto inflow = std::make_shared<Inflow>(blocks, setup, ids); - auto probe = std::make_shared<Probe>(blocks, setup, ids, config->getOneBlock("Probes")); - auto plotter = std::make_shared<Plotter>(blocks, setup, ids); - - -/* - const real_t lenght = 8.0; - const real_t res = 3.0; - const real_t start = 0.0; - const uint_t nx = uint_c(std::floor(lenght / (setup.dxC * res))); - for(uint_t i = 0; i < nx; ++i){ - probe->addPoint(Vector3<real_t>(start + real_c(i) * (setup.dxC * res), 0.5, 0.5 )); - } -*/ - - const uint_t nx = blocks->getXSize() * blocks->getNumberOfXCells(); - for(uint_t i = 0; i < nx; ++i){ - probe->addPoint(Vector3<real_t>(real_c(i) * setup.dxC, finalDomain.center()[1], finalDomain.center()[2] )); - } - - probe->addPoint(finalDomain.center()); - probe->initialise(); - - ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// LB SWEEPS AND BOUNDARY HANDLING /// @@ -371,7 +342,6 @@ int main(int argc, char** argv){ setup.fluidFlag = flagField->registerFlag(setup.fluidUID); } - geometry::initBoundaryHandling< FlagField_T >(*blocks, ids.flagField, boundariesConfig); std::function< VelocityField_T::value_type (const Cell&, const Cell&, const shared_ptr< StructuredBlockForest >&, IBlock&) > wallDistanceFunctor = defaultWallDistance; if(setup.useGrid){ @@ -379,7 +349,6 @@ int main(int argc, char** argv){ initGrid(blocks, ids.flagField, setup); } - if(config->getNumBlocks("RoughWall") > 0){ WALBERLA_LOG_INFO_ON_ROOT(++step << ". Using RoughWall setup") auto wallParameters = config->getOneBlock("RoughWall"); @@ -390,13 +359,17 @@ int main(int argc, char** argv){ wallDistanceFunctor = wallDist; } + if(config->getNumBlocks("genericPorousMatrix") > 0){ + WALBERLA_LOG_INFO_ON_ROOT(++step << ". Using Generic Porous Matrix setup") + initGenericPorousMatrix(blocks, ids.flagField, setup, config->getOneBlock("genericPorousMatrix")); + } + if(config->getNumBlocks("TPMS") > 0){ WALBERLA_LOG_INFO_ON_ROOT(++step << ". Placing TPMS material") auto tpms = std::make_shared<TPMS>(config->getOneBlock("TPMS"), setup); tpms->setupBoundary(blocks, ids.flagField); } - geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, ids.flagField, setup.fluidUID, cell_idx_c(0)); BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfFieldGPU, setup.fluidUID, ids.velocityFieldInflowSliceGPU, setup.latticeVelocity, wallDistanceFunctor, ids.pdfField); @@ -408,9 +381,7 @@ int main(int argc, char** argv){ geometry::writeMesh("TPMS.obj", *mesh); } } - gpu::fieldCpy< GPUFlagField_T, FlagField_T >(blocks, ids.flagFieldGPU, ids.flagField); - auto bvc = std::make_shared<BulkVelocityCalculator>(blocks, ids, setup); ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// COMMUNICATION SCHEME /// @@ -424,13 +395,11 @@ int main(int argc, char** argv){ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// TIME STEP DEFINITIONS /// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + const int streamLowPriority = 0; auto defaultStream = gpu::StreamRAII::newPriorityStream(streamLowPriority); SweepTimeloop timeloop(blocks->getBlockStorage(), setup.timesteps); - timeloop.addFuncBeforeTimeStep(inflow->updateGPUFunctor(), "Update inflow velocity"); - timeloop.addFuncBeforeTimeStep(probe->updateFunctor(), "Update Probe Data"); - lbm_generated::BasicRecursiveTimeStepGPU< GPUPdfField_T, SweepCollection_T, BoundaryCollection_T > LBMMeshRefinement(blocks, ids.pdfFieldGPU, sweepCollection, boundaryCollection, nonUniformCommunication, nonUniformPackInfo); @@ -445,26 +414,49 @@ int main(int argc, char** argv){ } const uint_t forceUpdateFrequency = parameters.getParameter< uint_t >("forceUpdateFrequency", 0); + auto bvc = std::make_shared<BulkVelocityCalculator>(blocks, ids, setup, forceUpdateFrequency); if(forceUpdateFrequency > 0){ auto updateForce = [&](){ if(timeloop.getCurrentTimeStep() % forceUpdateFrequency == 0) { const real_t correctedDrivingForce = bvc->getDrivingForce(); - sweepCollection.setFx(correctedDrivingForce); + if(parameters.getParameter< bool >("adaptDrivingForce", false)){ + sweepCollection.setFx(correctedDrivingForce); + } + } }; timeloop.addFuncBeforeTimeStep(updateForce, "Update force"); } + std::function<void(IBlock*)> copyVelocity = gpu::fieldCpyFunctor< VelocityField_T, gpu::GPUField< VelocityField_T::value_type > >(ids.velocityField, ids.velocityFieldGPU); + std::function<void(IBlock*)> copyDensity = gpu::fieldCpyFunctor< ScalarField_T, gpu::GPUField< ScalarField_T::value_type > >(ids.densityField, ids.densityFieldGPU); + std::function<void(IBlock*)> copyMeanVelocity; + std::function<void(IBlock*)> copySOSField; + const uint_t welfordSamplingStart = parameters.getParameter< uint_t >("welfordSamplingStart", 0); const uint_t welfordSamplingFrequency = parameters.getParameter< uint_t >("welfordSamplingFrequency", 0); + std::shared_ptr<WelfordSweep_T> welfordSweep; if(welfordSamplingFrequency > 0){ + + WALBERLA_LOG_INFO_ON_ROOT(++step << ". Adding welford algorithm to evaluate mean field quantities") + + ids.meanVelocityField = field::addToStorage< VelocityField_T >(blocks, "mean velocity", macroFieldType(0.0), field::fzyx, setup.numGhostLayers, allocator); + ids.sosField = field::addToStorage< TensorField_T >(blocks, "sos velocity", macroFieldType(0.0), field::fzyx, setup.numGhostLayers, allocator); + ids.meanVelocityFieldGPU = gpu::addGPUFieldToStorage< VelocityField_T >(blocks, ids.meanVelocityField, "mean velocity on GPU", true); + ids.sosFieldGPU = gpu::addGPUFieldToStorage< TensorField_T >(blocks, ids.sosField, "sos field on GPU", true); + + copyMeanVelocity = gpu::fieldCpyFunctor< VelocityField_T, gpu::GPUField< VelocityField_T::value_type > >(ids.meanVelocityField, ids.meanVelocityFieldGPU); + copySOSField = gpu::fieldCpyFunctor< TensorField_T, gpu::GPUField< TensorField_T::value_type > >(ids.sosField, ids.sosFieldGPU); + + welfordSweep = make_shared<WelfordSweep_T>(ids.meanVelocityFieldGPU, ids.sosFieldGPU, ids.velocityFieldGPU, real_c(0.0)); + auto welfordUpdate = [&](){ if(timeloop.getCurrentTimeStep() > welfordSamplingStart && (timeloop.getCurrentTimeStep() % welfordSamplingFrequency) == 0) { - const uint_t velCtr = uint_c(welfordSweep.getCounter()); - welfordSweep.setCounter(welfordSweep.getCounter() + real_c(1.0)); + const uint_t velCtr = uint_c(welfordSweep->getCounter()); + welfordSweep->setCounter(welfordSweep->getCounter() + real_c(1.0)); for (auto& block : *blocks){ - welfordSweep.run(&block); + welfordSweep->run(&block); } }; }; @@ -472,12 +464,54 @@ int main(int argc, char** argv){ } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - /// VTK Output /// + /// Post Processing /// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - auto copyVelocity = gpu::fieldCpyFunctor< VelocityField_T, gpu::GPUField< VelocityField_T::value_type > >(ids.velocityField, ids.velocityFieldGPU); - auto copyDensity = gpu::fieldCpyFunctor< ScalarField_T, gpu::GPUField< ScalarField_T::value_type > >(ids.densityField, ids.densityFieldGPU); - auto copyMeanVelocity = gpu::fieldCpyFunctor< VelocityField_T, gpu::GPUField< VelocityField_T::value_type > >(ids.meanVelocityField, ids.meanVelocityFieldGPU); + auto inflow = std::make_shared<Inflow>(blocks, setup, ids); + auto probe = std::make_shared<Probe>(blocks, setup, ids, config->getOneBlock("Probes")); + + +/* + const real_t lenght = 8.0; + const real_t res = 3.0; + const real_t start = 0.0; + const uint_t nx = uint_c(std::floor(lenght / (setup.dxC * res))); + for(uint_t i = 0; i < nx; ++i){ + probe->addPoint(Vector3<real_t>(start + real_c(i) * (setup.dxC * res), 0.5, 0.5 )); + } +*/ + + // const uint_t nx = blocks->getXSize() * blocks->getNumberOfXCells(); + // for(uint_t i = 0; i < nx; ++i){ + // probe->addPoint(Vector3<real_t>(real_c(i) * setup.dxC, finalDomain.center()[1], finalDomain.center()[2] )); + // } + // probe->initialise(); + + + + for(uint_t i = 0; i < blocks->getXSize() * blocks->getNumberOfXCells(); ++i){ + probe->addPoint(Vector3<real_t>(real_c(i) * setup.dxC, finalDomain.center()[1], finalDomain.center()[2] )); + } + +/* + for(uint_t i = 0; i < blocks->getYSize() * blocks->getNumberOfYCells(); ++i){ + probe->addPoint(Vector3<real_t>(finalDomain.center()[0], real_c(i) * setup.dxC, finalDomain.center()[2] )); + } + + for(uint_t i = 0; i < blocks->getZSize() * blocks->getNumberOfZCells(); ++i){ + probe->addPoint(Vector3<real_t>(finalDomain.center()[0], finalDomain.center()[1], real_c(i) * setup.dxC)); + } + +*/ + probe->initialise(); + + + timeloop.addFuncBeforeTimeStep(inflow->updateGPUFunctor(), "Update inflow velocity"); + timeloop.addFuncBeforeTimeStep(probe->updateFunctor(), "Update Probe Data"); + + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /// VTK Output /// + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // VTK auto VTKWriter = config->getOneBlock("VTKWriter"); @@ -488,6 +522,7 @@ int main(int argc, char** argv){ const real_t samplingResolution = VTKWriter.getParameter< real_t >("samplingResolution", real_c(-1.0)); const uint_t initialWriteCallsToSkip = VTKWriter.getParameter< uint_t >("initialWriteCallsToSkip", uint_c(0.0)); const real_t sliceThickness = VTKWriter.getParameter< real_t >("sliceThickness", real_c(1.0)); + const bool lastStepAsVolumeFile = VTKWriter.getParameter< bool >("lastStepAsVolumeFile", false); if (vtkWriteFrequency > 0){ const std::string identifier = setup.fileStem; @@ -501,18 +536,12 @@ int main(int argc, char** argv){ for (auto& block : *blocks){ if (VTKWriter.getParameter< bool >("velocity")){copyVelocity(&block);} if (VTKWriter.getParameter< bool >("density")){copyDensity(&block);} - if (VTKWriter.getParameter< bool >("meanVelocity")){copyMeanVelocity(&block);} + if (VTKWriter.getParameter< bool >("meanVelocity") && copyMeanVelocity){copyMeanVelocity(&block);} } - auto drivingForeceGPU = bvc->getDrivingForce(); - auto initialDrivingForce = setup.drivingForce; - - auto bulkVelocityCPU = calculateBulkVelocity(blocks, ids, setup.fluidUID); - auto blukVelocityGPU = bvc->bulkVelocity(); - WALBERLA_LOG_DEVEL_VAR(bulkVelocityCPU); - WALBERLA_LOG_DEVEL_VAR(blukVelocityGPU); - WALBERLA_LOG_DEVEL_VAR(blukVelocityGPU); - WALBERLA_LOG_DEVEL_VAR(drivingForeceGPU); + if(lastStepAsVolumeFile && timeloop.getCurrentTimeStep() == setup.timesteps - 1){ + vtkOutput->removeCellInclusionFilters(); + } }); @@ -520,6 +549,7 @@ int main(int argc, char** argv){ field::FlagFieldCellFilter<FlagField_T> fluidFilter( ids.flagField ); fluidFilter.addFlag( setup.obstacleUID ); + fluidFilter.addFlag( setup.noSlipUID ); vtkOutput->addCellExclusionFilter(fluidFilter); @@ -545,7 +575,7 @@ int main(int argc, char** argv){ vtkOutput->addCellDataWriter(velWriter); } } - if (VTKWriter.getParameter< bool >("meanVelocity", false)){ + if (VTKWriter.getParameter< bool >("meanVelocity", false) && ids.meanVelocityField){ auto meanVelWriter = make_shared< field::VTKWriter< VelocityField_T, float32 > >(ids.meanVelocityField, "meanVelocity"); vtkOutput->addCellDataWriter(meanVelWriter); @@ -606,17 +636,25 @@ int main(int argc, char** argv){ #endif WALBERLA_MPI_BARRIER() simTimer.end(); + WALBERLA_LOG_INFO_ON_ROOT("Simulation finished") + double time = double_c(simTimer.max()); - gpu::fieldCpy< TensorField_T, gpu::GPUField< VelocityField_T::value_type > >(blocks, ids.sosField, ids.sosFieldGPU); + if(copySOSField && evaluation){ + WALBERLA_CHECK_NOT_NULLPTR(welfordSweep) + WALBERLA_LOG_INFO_ON_ROOT("Evaluating mean field") + auto plotter = std::make_shared<Plotter>(blocks, setup, ids); + for (auto& block : *blocks){ + copySOSField(&block); + } - const double ut = evaluation->getMeanFrictionVelocity(); - const double welfordCounter = welfordSweep.getCounter(); - WALBERLA_LOG_DEVEL_VAR(ut) - WALBERLA_LOG_DEVEL_VAR(welfordCounter) - plotter->writeResult(ut, welfordCounter); + const double ut = evaluation->getMeanFrictionVelocity(); + const double welfordCounter = welfordSweep->getCounter(); + WALBERLA_LOG_DEVEL_VAR(ut) + WALBERLA_LOG_DEVEL_VAR(welfordCounter) + + plotter->writeResult(ut, welfordCounter); + } - WALBERLA_LOG_INFO_ON_ROOT("Simulation finished") - double time = double_c(simTimer.max()); WALBERLA_MPI_SECTION() { walberla::mpi::reduceInplace(time, walberla::mpi::MAX); } performance.logResultOnRoot(setup.timesteps, time); diff --git a/apps/showcases/PorousMediaGPU/Probe.cpp b/apps/showcases/PorousMediaGPU/Probe.cpp index 37323d336..aa06bcebc 100644 --- a/apps/showcases/PorousMediaGPU/Probe.cpp +++ b/apps/showcases/PorousMediaGPU/Probe.cpp @@ -25,35 +25,46 @@ namespace walberla{ void Probe::initialise(){ if(probePoints_.empty()){return;} - const AABB domain = blocks_->getDomain(); + uint_t idx = 0; + std::vector<Vector3<real_t>> cellCenters; for(auto p: probePoints_){ - if(!domain.contains(p)){continue;} IBlock* block = blocks_->getBlock(p); if(block != nullptr){ + const uint_t level = blocks_->getLevel(*block); + auto * flagField = block->template getData<FlagField_T>(ids_.flagField); + auto domainFlag = flagField->getFlag(setup_.fluidUID); const Cell localCell = blocks_->getBlockLocalCell(*block, p); - Vector3<real_t> localCellCenter; - blocks_->getCellCenter(localCellCenter, localCell); - pointsPerBlock_[block].emplace_back(localCellCenter); + if(isFlagSet(flagField->get(localCell), domainFlag)){ + Cell globalCell; + blocks_->transformBlockLocalToGlobalCell(globalCell, *block, localCell); + Vector3<real_t> gloablCellCenter = blocks_->getCellCenter(globalCell, level); + if(std::find(cellCenters.begin(), cellCenters.end(), gloablCellCenter) == cellCenters.end()){ + cellCenters.emplace_back(gloablCellCenter); + + globalPointsPerBlock_[block].emplace_back(std::make_pair(gloablCellCenter, idx)); + localCellsPerBlock_[block].emplace_back(localCell); + idx++; + } + } } } - uint_t totalNumberOfPoints = 0; - for (auto const& it : pointsPerBlock_){ + if(globalPointsPerBlock_.empty()){return;} + + for (auto const& it : localCellsPerBlock_){ IBlock* block = it.first; - std::vector<Vector3<real_t>> points = it.second; + auto localCells = it.second; auto * dataVector = block->getData< DataVector > ( probeDataID ); auto & cpuCellVector = dataVector->cpuCellVector(); auto & cpuDataVector = dataVector->cpuDataVector(); - const uint_t n = points.size(); - totalNumberOfPoints += n; + const uint_t n = localCells.size(); cpuCellVector.resize(3 * n); uint_t i = 0; - for(auto p: points){ - const Cell localCell = blocks_->getBlockLocalCell(*block, p); + for(const auto localCell: localCells){ cpuCellVector[i] = localCell[0]; cpuCellVector[i + n] = localCell[1]; @@ -78,7 +89,6 @@ void Probe::initialise(){ selectStart[0] = 0; writeHeaderFiles(); - WALBERLA_LOG_INFO_ON_ROOT("Added " << totalNumberOfPoints << " probes for postprocessing") } void Probe::writeHeaderFiles(){ @@ -93,37 +103,34 @@ void Probe::writeHeaderFiles(){ const auto outputFolder = setup_.outputFolder; const auto fileStem = setup_.fileStem; - const auto fileName = outputFolder + "/" + fileStem + "Probe.h5"; - filesystem::path hdfFilePath( fileName.c_str() ); + const auto probeFolderString = outputFolder + "/" + fileStem + "Probes"; + filesystem::path probeFolder( probeFolderString ); WALBERLA_ROOT_SECTION() { - if( filesystem::exists( hdfFilePath ) ) - std::remove( hdfFilePath.string().c_str() ); + if( filesystem::exists( probeFolder )) + filesystem::remove_all( probeFolder ); + + if ( !filesystem::exists( probeFolder ) ) + filesystem::create_directories( probeFolder ); } + WALBERLA_MPI_BARRIER() + + const auto fileName = probeFolderString + "/" + "ProbeRank_" + std::to_string(MPIManager::instance()->rank()) + ".h5"; + filesystem::path hdfFilePath( fileName.c_str() ); + H5::FileAccPropList fileAccProps; -#ifdef WALBERLA_BUILD_WITH_MPI - // Enable MPI-IO - H5Pset_fapl_mpio(fileAccProps.getId(), mpi::MPIManager::instance()->comm(), MPI_INFO_NULL); -#endif h5file_ = std::make_unique<H5::H5File>(hdfFilePath.c_str(), H5F_ACC_CREAT | H5F_ACC_RDWR, H5::FileCreatPropList::DEFAULT, fileAccProps); H5::Group timeGroup = h5file_->createGroup("time"); timeGroup.createDataSet("t", io::h5Float(), arrayDataSpace, props); - uint_t counter = 0; - for (auto const& it : pointsPerBlock_){ - IBlock* block = it.first; - auto* dataVector = block->getData< DataVector > ( probeDataID ); - auto & cpuCellVector = dataVector->cpuCellVector(); - const uint_t n = uint_c(dataVector->size()); - for(uint_t i = 0; i < n; ++i){ - const Cell localCell(cell_idx_c(cpuCellVector[i]), cell_idx_c(cpuCellVector[i + n]), cell_idx_c(cpuCellVector[i + 2*n])); - Vector3< real_t > p; - blocks_->getBlockLocalCellCenter(*block, localCell, p); - - std::string probeName = "Probe" + std::to_string(counter); + for (auto const& it : globalPointsPerBlock_){ + for(uint_t i = 0; i < it.second.size(); ++i){ + const auto p = it.second[i].first; + const auto idx = it.second[i].second; + std::string probeName = "Probe_" + std::to_string(idx); if (h5file_->exists(probeName)) { continue; } H5::Group probeGroup = h5file_->createGroup(probeName); @@ -137,24 +144,23 @@ void Probe::writeHeaderFiles(){ for(auto name: dataSetNames){ probeGroup.createDataSet(name, io::h5Float(), arrayDataSpace, props); } - counter++; } } } void Probe::writeProbes(){ + if(globalPointsPerBlock_.empty()){return;} extendSize[0] += executionCounter_; - - std::vector<float> tmp(executionCounter_); + std::vector<float> timeData(executionCounter_); const real_t t = (real_c(totalExecutionCounter_ + frequency_ - uint_c(1)) * setup_.dt) - ( real_c(executionCounter_ * frequency_) * setup_.dt); for(uint_t i = 0; i < executionCounter_; ++i){ - tmp[i] = float_c(t) + float_c(i)*float_c(setup_.dt); + timeData[i] = float_c(t) + float_c(i)*float_c(setup_.dt); } - writeData(tmp, "time", "t"); + writeData(timeData, "time", "t"); - for (auto const& it : pointsPerBlock_){ + for (auto const& it : globalPointsPerBlock_){ IBlock* block = it.first; auto* dataVector = block->getData< DataVector > ( probeDataID ); dataVector->syncCPU(); @@ -163,14 +169,19 @@ void Probe::writeProbes(){ const uint_t nDatasets = dataSetNames.size(); - for (auto const& it : pointsPerBlock_){ + std::vector<float> tmp(executionCounter_); + for (auto const& it : globalPointsPerBlock_){ IBlock* block = it.first; + auto* dataVector = block->getData< DataVector > ( probeDataID ); auto & cpuDataVector = dataVector->cpuDataVector(); - const uint_t n = uint_c(dataVector->size()); + const uint_t n = uint_c(dataVector->size()); + WALBERLA_CHECK_EQUAL(n, it.second.size()) for(uint_t i = 0; i < n; ++i){ - const std::string probeName = "Probe" + std::to_string(i); + const auto idx = it.second[i].second; + + const std::string probeName = "Probe_" + std::to_string(idx); for(uint_t j = 0; j < nDatasets; ++j){ auto dataSetName = dataSetNames[j]; for(uint_t k = 0; k < executionCounter_; ++k){ @@ -205,7 +216,7 @@ void Probe::update(){ const float velocityScalingFactor = float_c(setup_.conversionFactor); - for (auto const& it : pointsPerBlock_){ + for (auto const& it : globalPointsPerBlock_){ IBlock* block = it.first; auto density = block->getData< gpu::GPUField<double> >(ids_.densityFieldGPU); auto velocity = block->getData< gpu::GPUField<double> >(ids_.velocityFieldGPU); diff --git a/apps/showcases/PorousMediaGPU/Probe.h b/apps/showcases/PorousMediaGPU/Probe.h index f787233ad..374547f88 100644 --- a/apps/showcases/PorousMediaGPU/Probe.h +++ b/apps/showcases/PorousMediaGPU/Probe.h @@ -24,6 +24,8 @@ #include "core/logging/Initialization.h" #include "core/math/Constants.h" #include "core/Filesystem.h" +#include "core/mpi/Broadcast.h" +#include "core/mpi/Gatherv.h" #include "gpu/GPUWrapper.h" #include "gpu/ErrorChecking.h" @@ -140,7 +142,8 @@ class Probe const std::vector<std::string> dataSetNames{"rho", "ux", "uy", "uz"}; std::vector<Vector3<real_t>> probePoints_; - std::map<IBlock*, std::vector<Vector3<real_t>> > pointsPerBlock_; + std::map<IBlock*, std::vector< std::pair< Vector3<real_t>, uint_t > > > globalPointsPerBlock_; + std::map<IBlock*, std::vector<Cell> > localCellsPerBlock_; std::map<IBlock*, Vector3<uint32_t>> gpuBlock_; std::map<IBlock*, Vector3<uint32_t>> gpuGrid_; diff --git a/apps/showcases/PorousMediaGPU/Setup.h b/apps/showcases/PorousMediaGPU/Setup.h index d156bccc2..44ce970eb 100644 --- a/apps/showcases/PorousMediaGPU/Setup.h +++ b/apps/showcases/PorousMediaGPU/Setup.h @@ -72,6 +72,8 @@ struct Setup numberOfFourrierModes = turbulence.getParameter< uint_t >("numberOfFourrierModes"); uc = turbulence.getParameter< Vector3< real_t > >("U_c"); + const real_t speedOfSound = real_c(real_c(1.0) / std::sqrt( real_c(3.0) )); + if(config->getNumBlocks("RoughWall") > 0){ // kinViscosity = parameters.getParameter< real_t >("viscosity"); referenceLength *= real_c(0.5); @@ -84,17 +86,32 @@ struct Setup kinViscosity = referenceLength * referenceVelocity / bulkReynoldsNumber; - const real_t speedOfSound = real_c(real_c(1.0) / std::sqrt( real_c(3.0) )); machNumber = latticeVelocity / speedOfSound; omega = real_c(real_c(1.0) / (real_c(3.0) * viscosity + real_c(0.5))); frictionVelocity = (reynoldsNumber * viscosity) / latticeReferenceLength; drivingForce = calculateDrivingForce(frictionVelocity, latticeReferenceLength, latticeVelocity, latticeVelocity); } + else if(config->getNumBlocks("genericPorousMatrix") > 0){ + auto gpm = config->getOneBlock("genericPorousMatrix"); + + referenceLength = gpm.getParameter< real_t >("barSize"); + const real_t latticeReferenceLength = referenceLength / coarseMeshSize; + viscosity = latticeReferenceLength * latticeVelocity / reynoldsNumber; + referenceVelocity = latticeVelocity; + dt = latticeVelocity / referenceVelocity * coarseMeshSize; + kinViscosity = referenceLength * referenceVelocity / reynoldsNumber; + machNumber = latticeVelocity / speedOfSound; + omega = real_c(real_c(1.0) / (real_c(3.0) * viscosity + real_c(0.5))); + + const real_t pressureGradient = parameters.getParameter< real_t >("pressureGradient"); + const real_t XYslice = real_c(rootBlocks[0] * cellsPerBlock[0] * rootBlocks[1] * cellsPerBlock[1]); + drivingForce = pressureGradient / XYslice; + } else{ referenceVelocity = parameters.getParameter< real_t >("referenceVelocity"); const real_t latticeReferenceLength = referenceLength / coarseMeshSize; - const real_t speedOfSound = real_c(real_c(1.0) / std::sqrt( real_c(3.0) )); + machNumber = latticeVelocity / speedOfSound; viscosity = real_c((latticeVelocity * latticeReferenceLength) / reynoldsNumber); omega = real_c(real_c(1.0) / (real_c(3.0) * viscosity + real_c(0.5))); diff --git a/apps/showcases/PorousMediaGPU/TPMS.prm b/apps/showcases/PorousMediaGPU/TPMS.prm index 019bd4890..abae5ca9e 100644 --- a/apps/showcases/PorousMediaGPU/TPMS.prm +++ b/apps/showcases/PorousMediaGPU/TPMS.prm @@ -5,9 +5,12 @@ Parameters referenceVelocity 1.0; referenceLength 1.0; latticeVelocity 0.025; - initialiseWithlatticeVelocity true; + initialiseWithlatticeVelocity false; - timesteps 100001; + welfordSamplingStart 0; + welfordSamplingFrequency 1; + + timesteps 21; processMemoryLimit 512; // MiB innerOuterSplit <1, 1, 1>; @@ -41,9 +44,9 @@ TPMS DomainSetup { domainSize < 8, 1, 1 >; - // cellsPerBlock < 384, 48, 48 >; + cellsPerBlock < 384, 48, 48 >; // cellsPerBlock < 768, 96, 96 >; - cellsPerBlock < 1536, 192, 192 >; + // cellsPerBlock < 1536, 192, 192 >; blocks < 1, 1, 1 >; periodic < false, true, true >; refinementLevels 0; @@ -68,10 +71,11 @@ StabilityChecker VTKWriter { - vtkWriteFrequency 50000; + vtkWriteFrequency 10; vtkGhostLayers 0; velocity true; writeVelocityAsMagnitude false; + meanVelocity true; density false; flag false; writeOnlySlice true; @@ -81,6 +85,7 @@ VTKWriter oneFilePerProcess false; samplingResolution -1; initialWriteCallsToSkip 0; + lastStepAsVolumeFile true; } Logging @@ -90,6 +95,7 @@ Logging readSetupFromFile false; remainingTimeLoggerFrequency 20; // in seconds writeSurfaceMeshOfTPMS false; + filestem TPMS; } Probes diff --git a/apps/showcases/PorousMediaGPU/roughWall.prm b/apps/showcases/PorousMediaGPU/roughWall.prm index 8215f12bf..bf9a0f5f5 100644 --- a/apps/showcases/PorousMediaGPU/roughWall.prm +++ b/apps/showcases/PorousMediaGPU/roughWall.prm @@ -6,6 +6,7 @@ Parameters initialiseWithlatticeVelocity true; initialiseWithTurbulence true; forceUpdateFrequency 10; + adaptDrivingForce false; welfordSamplingStart 200000; welfordSamplingFrequency 1; diff --git a/apps/showcases/PorousMediaGPU/utilGPU.cu b/apps/showcases/PorousMediaGPU/utilGPU.cu index c1c6f47cc..741cb6688 100644 --- a/apps/showcases/PorousMediaGPU/utilGPU.cu +++ b/apps/showcases/PorousMediaGPU/utilGPU.cu @@ -18,7 +18,7 @@ // //====================================================================================================================== #include "utilGPU.h" -// #include <curand_kernel.h> +#include <curand_kernel.h> namespace walberla { @@ -47,7 +47,7 @@ static __device__ void singleMode(double* rX, double* rY, double* rZ, const doub { const real_t pi2 = 6.28318530717958647693; -/* + curandStatePhilox4_32_10_t state; curand_init(42, linear_index, 0, &state); @@ -60,8 +60,8 @@ static __device__ void singleMode(double* rX, double* rY, double* rZ, const doub const real_t zetaX = real_t(2.0) * real_t(r1.w) - real_t(1.0); const real_t zetaY = real_t(2.0) * real_t(r2.x) - real_t(1.0); const real_t zetaZ = real_t(2.0) * real_t(r2.y) - real_t(1.0); -*/ +/* // For debugging const real_t a = 0.1; // math::realRandom(real_c(0.0), real_c(1.0)); const real_t phi = 0.2 * pi2; //math::realRandom(real_c(0.0), real_c(1.0)) * pi2; @@ -69,7 +69,7 @@ static __device__ void singleMode(double* rX, double* rY, double* rZ, const doub const real_t zetaX = -0.4; //math::realRandom(real_c(-1.0), real_c(1.0)); const real_t zetaY = -0.6; //math::realRandom(real_c(-1.0), real_c(1.0)); const real_t zetaZ = 0.8; // math::realRandom(real_c(-1.0), real_c(1.0)); - +*/ const real_t theta = acos(real_t(2.0) * a - real_t(1.0)); @@ -236,8 +236,6 @@ void updateVelocityPlane(gpu::GPUField<real_t>* velocityGPU, real_t* kappa, real const real_t dx, const real_t t, const uint_t n, const real_t latticeVelocity, const int64_t nGhostLayers, const Vector3<uint32_t>& block, const Vector3<uint32_t>& grid){ - WALBERLA_ABORT("Turbulent Inflow not usuable at the moment") - dim3 block_(block[0], block[1], block[2]); dim3 grid_(grid[0], grid[1], grid[2]); @@ -326,7 +324,7 @@ void BulkVelocityCalculator::calculateBulkVelocity() { bulkVelocity += bulkVelocityPerBlock; } - mpi::allReduceInplace< double >(bulkVelocity, mpi::SUM); + mpi::allReduceInplace< double >(bulkVelocity, mpi::SUM, MPIManager::instance()->comm()); bulkVelocity_ = double_c(bulkVelocity) / volume_; WALBERLA_GPU_CHECK(gpuMemset(resultGPU_, 0.0, sizeof(double) * resultCPU_.size())); diff --git a/apps/showcases/PorousMediaGPU/utilGPU.h b/apps/showcases/PorousMediaGPU/utilGPU.h index 7fff9bffc..1d1bbc6c7 100644 --- a/apps/showcases/PorousMediaGPU/utilGPU.h +++ b/apps/showcases/PorousMediaGPU/utilGPU.h @@ -23,6 +23,8 @@ #include "core/DataTypes.h" #include "core/math/Constants.h" +#include "core/Filesystem.h" +#include "core/mpi/Reduce.h" #include "gpu/GPUWrapper.h" #include "gpu/ErrorChecking.h" @@ -47,8 +49,8 @@ void updateProbe(gpu::GPUField<real_t>* density, gpu::GPUField<real_t>* velocity class BulkVelocityCalculator { public: - BulkVelocityCalculator(const std::shared_ptr<StructuredBlockForest> & blocks, const IDs& ids, const Setup& setup) - : blocks_(blocks), setup_(setup), ids_(ids) + BulkVelocityCalculator(const std::shared_ptr<StructuredBlockForest> & blocks, const IDs& ids, const Setup& setup, const uint_t forceUpdateFrequency) + : blocks_(blocks), setup_(setup), ids_(ids), forceUpdateFrequency_(forceUpdateFrequency), executionCounter_{0} { #if __CUDA_ARCH__ < 600 WALBERLA_LOG_WARNING_ON_ROOT("Warning atomic add in double precision not supported. Falling back to software implementation") @@ -75,14 +77,30 @@ public: const Vector3<uint_t> s(blocks_->getNumberOfXCellsPerBlock(), blocks_->getNumberOfYCellsPerBlock(), blocks_->getNumberOfZCellsPerBlock()); getBlockAndGridSize(s, block_, grid_); - mpi::allReduceInplace< double >(volume_, mpi::SUM); - - + mpi::allReduceInplace< double >(volume_, mpi::SUM, MPIManager::instance()->comm()); + resultCPU_.resize(nBlocks); WALBERLA_GPU_CHECK(gpuMalloc((void **)&resultGPU_, sizeof(double) * resultCPU_.size() )); WALBERLA_GPU_CHECK(gpuMemset(resultGPU_, 0.0, sizeof(double) * resultCPU_.size())); + const auto outputFolder = setup_.outputFolder; + const auto fileStem = setup_.fileStem; + fileName_ = outputFolder + "/" + fileStem + "DrivingForce.csv"; + filesystem::path drivingForceOuputFile( fileName_.c_str() ); + + WALBERLA_ROOT_SECTION() + { + if( filesystem::exists( drivingForceOuputFile ) ) + std::remove( drivingForceOuputFile.string().c_str() ); + + std::ofstream outfile( drivingForceOuputFile.c_str() ); + outfile << "SEP=," << "\n"; + + outfile << "timestep," << "force," << "U_b" << "\n"; + outfile.close(); + } + } double bulkVelocity() const { return bulkVelocity_; } @@ -93,7 +111,18 @@ public: calculateBulkVelocity(); const real_t latticeReferenceLength = setup_.referenceLength / setup_.dxC; - return calculateDrivingForce(setup_.frictionVelocity, latticeReferenceLength, setup_.latticeVelocity, bulkVelocity_); + const real_t drivingForce = calculateDrivingForce(setup_.frictionVelocity, latticeReferenceLength, setup_.latticeVelocity, bulkVelocity_); + const real_t timestep = real_c(executionCounter_ * forceUpdateFrequency_) * setup_.dt; + + WALBERLA_ROOT_SECTION() + { + std::ofstream outfile( fileName_.c_str(), std::ios_base::app ); + outfile << timestep << "," << drivingForce << "," << bulkVelocity_ << "\n"; + outfile.close(); + } + + executionCounter_++; + return drivingForce; } private: @@ -101,6 +130,10 @@ private: const Setup setup_; const IDs ids_; + const uint_t forceUpdateFrequency_; + uint_t executionCounter_; + std::string fileName_; + double volume_; double bulkVelocity_{0.0}; -- GitLab