From 0ac31586a2189878b99769dba93bd9e3272da568 Mon Sep 17 00:00:00 2001 From: Markus Holzer <markus.holzer@onera.fr> Date: Wed, 16 Apr 2025 16:24:36 +0200 Subject: [PATCH] [skip ci] update --- .../showcases/PorousMediaGPU/DragEvaluation.h | 146 ++++++++++++++++++ apps/showcases/PorousMediaGPU/JinCase.prm | 14 +- .../PorousMediaGPU/JinCaseInflow.prm | 11 +- .../PorousMediaGPU/PorousMediaGPU.cpp | 85 ++++++---- apps/showcases/PorousMediaGPU/Probe.h | 1 + apps/showcases/PorousMediaGPU/Setup.h | 3 + apps/showcases/PorousMediaGPU/TPMS.h | 2 - .../showcases/PorousMediaGPU/TPMSPeriodic.prm | 39 +++-- apps/showcases/PorousMediaGPU/roughWall.prm | 35 ++++- apps/showcases/PorousMediaGPU/utilGPU.cu | 41 +++-- apps/showcases/PorousMediaGPU/utilGPU.h | 45 ++++-- 11 files changed, 337 insertions(+), 85 deletions(-) diff --git a/apps/showcases/PorousMediaGPU/DragEvaluation.h b/apps/showcases/PorousMediaGPU/DragEvaluation.h index 9b57b4958..0c9a2661e 100644 --- a/apps/showcases/PorousMediaGPU/DragEvaluation.h +++ b/apps/showcases/PorousMediaGPU/DragEvaluation.h @@ -68,6 +68,10 @@ class DragEvaluation return (sumFx_ / double_c(meanQuantitiesCounter_)) / totalSurface_; } + void setTotalExecutionCounter(const uint_t counter){ + totalExecutionCounter_ = counter * ( 1 << setup_.refinementLevels ); + } + protected: bool initialized_{false}; @@ -135,4 +139,146 @@ class Plotter uint_t checkFrequency_{10000}; }; // class Plotter + +class PressureDropEvaluator +{ + public: + PressureDropEvaluator(const shared_ptr< StructuredBlockForest >& blocks, const Setup& setup, const IDs& ids, const Config::BlockHandle& pressureParameters) + : blocks_(blocks), setup_(setup), ids_(ids) + { + + evalAABB_ = pressureParameters.getParameter< AABB >("evaluationAABB"); + evalInterval_ = pressureParameters.getParameter< uint_t >("evaluationFrequency"); + + pressureDrop_ = real_c(0.0); + executionCounter_ = uint_c(0); + + fileName_ = setup_.outputFolder + "/" + setup_.fileStem + "PressureDrop.csv"; + filesystem::path pressureDropOuputFile( fileName_.c_str() ); + + WALBERLA_ROOT_SECTION() + { + if(!setup_.loadSnapshot){ + if( filesystem::exists( pressureDropOuputFile ) ) + std::remove( pressureDropOuputFile.string().c_str() ); + } + + if(!filesystem::exists( pressureDropOuputFile ) ){ + std::ofstream outfile( pressureDropOuputFile.c_str() ); + outfile << "SEP=," << "\n"; + + outfile << "timestep," << "time," << "pressureDrop," << "x0," << "x1" << "\n"; + outfile.close(); + } + } + } + + std::function<void()> updateFunctor() {return [this](){ updatePressureDrop(); };} + void setInitialTimestep(const uint_t initialTimestep){ + executionCounter_ = initialTimestep; + } + + + + private: + const shared_ptr< StructuredBlockForest > blocks_; + Setup setup_; + IDs ids_; + AABB evalAABB_; + uint_t evalInterval_; + real_t pressureDrop_; + + uint_t executionCounter_; // number of times operator() has been called + std::string fileName_; + + void syncDensity(){ + gpu::fieldCpy< ScalarField_T , gpu::GPUField< ScalarField_T::value_type > >(blocks_, ids_.densityField, ids_.densityFieldGPU); + } + + void updatePressureDrop(){ + + ++executionCounter_; + if (executionCounter_ % evalInterval_ != uint_c(0)) { return; } + + syncDensity(); + + real_t lowerDensity = real_c(0); + real_t upperDensity = real_c(0); + uint_t numCellsLowerSlice = uint_c(0); + uint_t numCellsUpperSlice = uint_c(0); + + auto domainFlag = setup_.fluidFlag; + + for (auto blockIterator = blocks_->begin(); blockIterator != blocks_->end(); ++blockIterator) + { + const uint_t level = blocks_->getLevel(*blockIterator); + + // global lower cell BB slice for density measurement (shifted 1 cell away from porous medium in + // z-direction) + CellInterval lowerCellBB = blocks_->getCellBBFromAABB(evalAABB_, level); + lowerCellBB.xMax() = lowerCellBB.xMin() + cell_idx_c(1); + // lowerCellBB.xMin() = lowerCellBB.xMin(); + numCellsLowerSlice = lowerCellBB.numCells(); + + // global upper cell BB slice for density measurement (shifted 1 cell away from porous medium in + // z-direction) + CellInterval upperCellBB = blocks_->getCellBBFromAABB(evalAABB_, level); + upperCellBB.xMin() = upperCellBB.xMax() - cell_idx_c(1); + // upperCellBB.xMax() = upperCellBB.xMin(); + numCellsUpperSlice = upperCellBB.numCells(); + + // block's cell BB that intersects the global slices (still in global coordinates) + CellInterval lowerBlockCellBB = blocks_->getBlockCellBB(*blockIterator); + lowerBlockCellBB.intersect(lowerCellBB); + CellInterval upperBlockCellBB = blocks_->getBlockCellBB(*blockIterator); + upperBlockCellBB.intersect(upperCellBB); + + // transform the global coordinates of relevant cells to block local coordinates + CellInterval lowerBlockLocalCellBB; + CellInterval upperBlockLocalCellBB; + blocks_->transformGlobalToBlockLocalCellInterval(lowerBlockLocalCellBB, *blockIterator, lowerBlockCellBB); + blocks_->transformGlobalToBlockLocalCellInterval(upperBlockLocalCellBB, *blockIterator, upperBlockCellBB); + + auto densityField = blockIterator->template getData< ScalarField_T >(ids_.densityField); + auto flagField = blockIterator->template getData< FlagField_T >(ids_.flagField); + + // sum density of relevant cells + // clang-format off + WALBERLA_FOR_ALL_CELLS_IN_INTERVAL_XYZ(lowerBlockLocalCellBB, + if (isFlagSet(flagField->get(x, y, z), domainFlag)) + { + lowerDensity += densityField->get(x, y, z); + } + ) // WALBERLA_FOR_ALL_CELLS_IN_INTERVAL_XYZ + + WALBERLA_FOR_ALL_CELLS_IN_INTERVAL_XYZ(upperBlockLocalCellBB, + if (isFlagSet(flagField->get(x, y, z), domainFlag)) + { + upperDensity += densityField->get(x, y, z); + } + ) // WALBERLA_FOR_ALL_CELLS_IN_INTERVAL_XYZ + // clang-format on + } + + mpi::allReduceInplace< real_t >(lowerDensity, mpi::SUM); + mpi::allReduceInplace< real_t >(upperDensity, mpi::SUM); + + // average density + lowerDensity /= real_c(numCellsLowerSlice); + upperDensity /= real_c(numCellsUpperSlice); + + pressureDrop_ = (upperDensity - lowerDensity) / real_c(3.0); + const real_t timestep = real_c(executionCounter_) * setup_.dt; + + + WALBERLA_ROOT_SECTION(){ + std::ofstream outfile( fileName_.c_str(), std::ios_base::app ); + outfile << std::setprecision(16); + outfile << executionCounter_ << "," << timestep << "," << pressureDrop_ << "," << evalAABB_.xMin() << "," << evalAABB_.xMax() << "\n"; + outfile.close(); + } + + } +}; // class PressureDropEvaluator + } \ No newline at end of file diff --git a/apps/showcases/PorousMediaGPU/JinCase.prm b/apps/showcases/PorousMediaGPU/JinCase.prm index dd57dc225..2984378f2 100644 --- a/apps/showcases/PorousMediaGPU/JinCase.prm +++ b/apps/showcases/PorousMediaGPU/JinCase.prm @@ -6,7 +6,7 @@ Parameters initialiseWithTurbulence false; pressureGradient 0.190; - timesteps 5000001; + timesteps 11; processMemoryLimit 512; // MiB innerOuterSplit <1, 1, 1>; @@ -17,8 +17,8 @@ Parameters Snapshot { - loadSnapshot false; - storeSnapshot true; + loadSnapshot true; + storeSnapshot false; recreateBlockForest true; snapshotFrequency 10; snapshotBaseFolder snapshots; @@ -28,6 +28,8 @@ ForcingParameters { forceUpdateFrequency 1000; adaptDrivingForce false; + writeFrequency 100; + targetBulkVelocity 0.04; proportionalGain 2e-4; derivativeGain 1e-6; integralGain 2e-4; @@ -54,9 +56,9 @@ genericPorousMatrix DomainSetup { domainSize < 12, 8, 4 >; - cellsPerBlock < 360, 240, 120 >; // cellsPerBlock < 360, 240, 120 >; - blocks < 2, 2, 2 >; + cellsPerBlock < 120, 160, 80 >; + blocks < 2, 1, 1 >; periodic < true, true, true >; refinementLevels 0; @@ -79,7 +81,7 @@ StabilityChecker VTKWriter { - vtkWriteFrequency 100000; + vtkWriteFrequency 10; vtkGhostLayers 0; velocity true; writeVelocityAsMagnitude false; diff --git a/apps/showcases/PorousMediaGPU/JinCaseInflow.prm b/apps/showcases/PorousMediaGPU/JinCaseInflow.prm index 1a14df767..74ba0e0be 100644 --- a/apps/showcases/PorousMediaGPU/JinCaseInflow.prm +++ b/apps/showcases/PorousMediaGPU/JinCaseInflow.prm @@ -6,7 +6,7 @@ Parameters initialiseWithTurbulence false; pressureGradient 0.190; - timesteps 100001; + timesteps 101; processMemoryLimit 512; // MiB innerOuterSplit <1, 1, 1>; @@ -19,7 +19,7 @@ Snapshot { loadSnapshot false; storeSnapshot true; - snapshotFrequency 100000; + snapshotFrequency 100; snapshotBaseFolder snapshots; } @@ -42,9 +42,10 @@ genericPorousMatrix DomainSetup { domainSize < 12, 8, 4 >; - cellsPerBlock < 360, 240, 120 >; // cellsPerBlock < 360, 240, 120 >; - blocks < 2, 2, 2 >; + cellsPerBlock < 240, 160, 80 >; + // blocks < 2, 2, 2 >; + blocks < 1, 1, 1 >; periodic < false, true, true >; refinementLevels 0; @@ -68,7 +69,7 @@ StabilityChecker VTKWriter { - vtkWriteFrequency 100000; + vtkWriteFrequency 100; vtkGhostLayers 0; velocity true; writeVelocityAsMagnitude false; diff --git a/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp b/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp index d2d09661e..873e0c112 100644 --- a/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp +++ b/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp @@ -299,6 +299,8 @@ int main(int argc, char** argv){ WALBERLA_LOG_INFO_ON_ROOT(" - beginTimeStep = " << beginTimeStep) } + setup.timesteps += beginTimeStep; + const StorageSpecification_T StorageSpec = StorageSpecification_T(); // Creating fields uint_t step = 0; @@ -384,10 +386,10 @@ int main(int argc, char** argv){ gpu::fieldCpy< gpu::GPUField< VelocityField_T::value_type >, VelocityField_T >(blocks, ids.velocityFieldInflowSliceGPU, ids.velocityFieldInflowSlice); } - else{ - for (auto& block : *blocks) - sweepCollection.calculateMacroscopicParameters(&block, cell_idx_c(setup.numGhostLayers - 1)); - } + // else{ + // for (auto& block : *blocks) + // sweepCollection.calculateMacroscopicParameters(&block, cell_idx_c(setup.numGhostLayers - 1)); + // } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// LB SWEEPS AND BOUNDARY HANDLING /// @@ -455,6 +457,23 @@ int main(int argc, char** argv){ auto nonUniformPackInfo = lbm_generated::setupNonuniformGPUPdfCommunication< GPUPdfField_T >(blocks, ids.pdfFieldGPU); nonUniformCommunication->addPackInfo(nonUniformPackInfo); + if(setup.loadSnapshot){ + const uint_t maxLevel = blocks->getDepth(); + for (uint_t level = 0; level <= maxLevel; level++){ + nonUniformCommunication->communicateEqualLevel(level); + if(level + 1 <= maxLevel){ + nonUniformCommunication->communicateCoarseToFine(level + 1); + nonUniformCommunication->communicateFineToCoarse(level + 1); + } + } + } + + WALBERLA_MPI_BARRIER() +#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT) + WALBERLA_GPU_CHECK(gpuDeviceSynchronize()) + WALBERLA_GPU_CHECK(gpuPeekAtLastError()) +#endif + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// TIME STEP DEFINITIONS /// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -462,6 +481,7 @@ int main(int argc, char** argv){ const int streamLowPriority = 0; auto defaultStream = gpu::StreamRAII::newPriorityStream(streamLowPriority); SweepTimeloop timeloop(blocks->getBlockStorage(), setup.timesteps); + timeloop.setCurrentTimeStep(beginTimeStep); lbm_generated::BasicRecursiveTimeStepGPU< GPUPdfField_T, SweepCollection_T, BoundaryCollection_T > LBMMeshRefinement(blocks, ids.pdfFieldGPU, sweepCollection, boundaryCollection, nonUniformCommunication, nonUniformPackInfo); @@ -473,6 +493,7 @@ int main(int argc, char** argv){ if(config->getNumBlocks("RoughWall") > 0){ WALBERLA_LOG_INFO_ON_ROOT(++step << ". Setup Drag Evaluation") evaluation = make_shared<DragEvaluation>(blocks, boundaryCollection, ids, setup, config); + evaluation->setTotalExecutionCounter(beginTimeStep); LBMMeshRefinement.addPostBoundaryHandlingBlockFunction(evaluation->forceCalculationFunctor()); } @@ -483,18 +504,26 @@ int main(int argc, char** argv){ if(bvc->forceUpdateFrequency() > 0){ auto updateForce = [&](){ - if(timeloop.getCurrentTimeStep() % bvc->forceUpdateFrequency() == 0) { + if(timeloop.getCurrentTimeStep() % bvc->forceUpdateFrequency() == 0) { const real_t correctedDrivingForce = bvc->getDrivingForce(); - if(bvc->adaptDrivingForce()){ - WALBERLA_LOG_DEVEL_VAR(correctedDrivingForce) - sweepCollection.setFx(correctedDrivingForce); - } + sweepCollection.setFx(correctedDrivingForce); } }; timeloop.addFuncBeforeTimeStep(updateForce, "Update force"); } } + shared_ptr< PressureDropEvaluator > pde; + if(config->getNumBlocks("PressureEvaluation") > 0){ + WALBERLA_LOG_INFO_ON_ROOT(++step << ". Add pressure drop calculator") + + pde = std::make_shared<PressureDropEvaluator>(blocks, setup, ids, config->getOneBlock("PressureEvaluation")); + pde->setInitialTimestep(beginTimeStep); + timeloop.addFuncBeforeTimeStep(pde->updateFunctor(), "Pressure Drop Evaluation"); + + } + + std::function<void(IBlock*)> copyPDFs = gpu::fieldCpyFunctor< PdfField_T, gpu::GPUField< PdfField_T::value_type > >(ids.pdfField, ids.pdfFieldGPU); std::function<void(IBlock*)> copyVelocityInflow = gpu::fieldCpyFunctor< VelocityField_T, gpu::GPUField< VelocityField_T::value_type > >(ids.velocityFieldInflowSlice, ids.velocityFieldInflowSliceGPU); std::function<void(IBlock*)> copyVelocity = gpu::fieldCpyFunctor< VelocityField_T, gpu::GPUField< VelocityField_T::value_type > >(ids.velocityField, ids.velocityFieldGPU); @@ -541,29 +570,23 @@ int main(int argc, char** argv){ auto probe = std::make_shared<Probe>(blocks, setup, ids, config->getOneBlock("Probes")); probe->setInitialTimestep(beginTimeStep); -/* - const real_t lenght = 8.0; - const real_t res = 3.0; - const real_t start = 0.0; - const uint_t nx = uint_c(std::floor(lenght / (setup.dxC * res))); - for(uint_t i = 0; i < nx; ++i){ - probe->addPoint(Vector3<real_t>(start + real_c(i) * (setup.dxC * res), 0.5, 0.5 )); - } -*/ - - // const uint_t nx = blocks->getXSize() * blocks->getNumberOfXCells(); - // for(uint_t i = 0; i < nx; ++i){ - // probe->addPoint(Vector3<real_t>(real_c(i) * setup.dxC, finalDomain.center()[1], finalDomain.center()[2] )); - // } - // probe->initialise(); probe->addPoint(finalDomain.center()); + const real_t xStart = finalDomain.xMin(); + const uint_t nx = uint_c(finalDomain.xSize() / setup.dxC); + for(uint_t i = 0; i <= nx; i+=4){ + probe->addPoint(Vector3<real_t>(xStart + real_c(i) * setup.dxC, finalDomain.center()[1], finalDomain.center()[2] )); + } + +/* const real_t xStart = finalDomain.xMin(); const uint_t spacing = uint_c(4); const uint_t nx = uint_c(finalDomain.xSize() / setup.dxC); - for(uint_t j = 1; j < 2; ++j){ + for(uint_t j = 0; j < 3; ++j){ + const Vector3<real_t> lineCenter(finalDomain.center()[0], real_c(2.0) + real_c(j) * real_c(2.0), finalDomain.center()[2]); + probe->addPoint(lineCenter); for(uint_t i = 0; i <= nx; i+=spacing){ probe->addPoint(Vector3<real_t>(xStart + real_c(i) * setup.dxC, real_c(2.0) + real_c(j) * real_c(2.0), finalDomain.center()[2] )); } @@ -572,16 +595,15 @@ int main(int argc, char** argv){ const real_t yStart = finalDomain.yMin(); const uint_t ny = uint_c(finalDomain.ySize() / setup.dxC); - for(uint_t j = 2; j < 3; ++j){ + for(uint_t j = 0; j < 5; ++j){ + const Vector3<real_t> lineCenter(real_c(2.0) + real_c(j) * real_c(2.0), finalDomain.center()[1], finalDomain.center()[2]); + probe->addPoint(lineCenter); for(uint_t i = 0; i <= ny; i+=spacing){ probe->addPoint(Vector3<real_t>(real_c(2.0) + real_c(j) * real_c(2.0), yStart + real_c(i) * setup.dxC, finalDomain.center()[2] )); } } - // for(uint_t i = 0; i < blocks->getYSize() * blocks->getNumberOfYCells(); ++i){ - // probe->addPoint(Vector3<real_t>(finalDomain.center()[0], real_c(i) * setup.dxC, finalDomain.center()[2] )); - // } - +*/ probe->initialise(); @@ -695,6 +717,7 @@ int main(int argc, char** argv){ if(setup.storeSnapshot){ auto snapShotFunction = [&](){ if(timeloop.getCurrentTimeStep() > uint_c(0) && ( (timeloop.getCurrentTimeStep() + 1) % setup.snapshotFrequency) == 0) { + WALBERLA_LOG_INFO_ON_ROOT("Writing Snapshot") for (auto& block : *blocks){ copyPDFs(&block); @@ -702,7 +725,7 @@ int main(int argc, char** argv){ if(copyMeanVelocity){copyMeanVelocity(&block);} if(copySOSField){copySOSField(&block);} } - + blocks->saveBlockData(setup.pdfFieldFile + "_tmp", ids.pdfField); blocks->saveBlockData(setup.inflowFile + "_tmp", ids.velocityFieldInflowSlice); diff --git a/apps/showcases/PorousMediaGPU/Probe.h b/apps/showcases/PorousMediaGPU/Probe.h index 4bf483853..7534da555 100644 --- a/apps/showcases/PorousMediaGPU/Probe.h +++ b/apps/showcases/PorousMediaGPU/Probe.h @@ -133,6 +133,7 @@ class Probe void update(); void setInitialTimestep(const uint_t initialTimestep){ totalExecutionCounter_ = initialTimestep; + rampUpTime_ += initialTimestep; } diff --git a/apps/showcases/PorousMediaGPU/Setup.h b/apps/showcases/PorousMediaGPU/Setup.h index 950234c94..8749aa716 100644 --- a/apps/showcases/PorousMediaGPU/Setup.h +++ b/apps/showcases/PorousMediaGPU/Setup.h @@ -124,6 +124,9 @@ struct Setup kinViscosity = viscosity * coarseMeshSize * coarseMeshSize / dt; } + if( parameters.isDefined("drivingForce") ) + drivingForce = parameters.getParameter< real_t >("drivingForce"); + omegaF = lbm_generated::relaxationRateScaling(omega, refinementLevels); rho = real_c(1.0); diff --git a/apps/showcases/PorousMediaGPU/TPMS.h b/apps/showcases/PorousMediaGPU/TPMS.h index 32c3d5afc..e72a72a7e 100644 --- a/apps/showcases/PorousMediaGPU/TPMS.h +++ b/apps/showcases/PorousMediaGPU/TPMS.h @@ -56,8 +56,6 @@ class TPMS tpmsBeginn_ = tpmsParameters.getParameter< real_t >("start"); tpmsEnd_ = tpmsParameters.getParameter< real_t >("end"); - WALBERLA_LOG_DEVEL_VAR(waveNumber_) - const std::string tpmsString = tpmsParameters.getParameter< std::string >("type"); if (tpmsString.compare("gyroid") == 0){ diff --git a/apps/showcases/PorousMediaGPU/TPMSPeriodic.prm b/apps/showcases/PorousMediaGPU/TPMSPeriodic.prm index bb947e407..ed3599bf7 100644 --- a/apps/showcases/PorousMediaGPU/TPMSPeriodic.prm +++ b/apps/showcases/PorousMediaGPU/TPMSPeriodic.prm @@ -6,11 +6,12 @@ Parameters referenceLength 1.0; latticeVelocity 0.025; initialiseWithlatticeVelocity false; + drivingForce 1e-8; welfordSamplingStart 0; welfordSamplingFrequency 1; - timesteps 10; + timesteps 101; processMemoryLimit 512; // MiB innerOuterSplit <1, 1, 1>; @@ -23,14 +24,15 @@ Snapshot { loadSnapshot true; storeSnapshot true; - snapshotFrequency 10; + snapshotFrequency 100; snapshotBaseFolder snapshots; } ForcingParameters { - forceUpdateFrequency 0; + forceUpdateFrequency 25; adaptDrivingForce false; + writeFrequency 1; // targetMeanVelocityMagnitude 0.02; proportionalGain 2e-4; derivativeGain 1e-6; @@ -38,6 +40,12 @@ ForcingParameters maxRamp 1e-4; } +PressureEvaluation +{ + evaluationAABB [ <0, 0, 0>, <4.5, 1, 1> ]; + evaluationFrequency 25; +} + Turbulence { useGrid false; @@ -51,7 +59,8 @@ Turbulence TPMS { - waveNumber 18.84955592; + // waveNumber 18.8495559215; + waveNumber 6.2831853072; scaling 1.0; strut 0.75; start 0.0; @@ -62,11 +71,16 @@ TPMS //! [domainSetup] DomainSetup { + // domainSize < 1, 1, 1 >; + // cellsPerBlock < 128, 128, 128 >; + + domainSize < 9, 1, 1 >; - cellsPerBlock < 270, 30, 30 >; - // cellsPerBlock < 2700, 300, 300 >; + cellsPerBlock < 288, 32, 32 >; + // cellsPerBlock < 720, 320, 320 >; // cellsPerBlock < 768, 96, 96 >; // cellsPerBlock < 1536, 192, 192 >; + // blocks < 4, 1, 1 >; blocks < 1, 1, 1 >; periodic < true, true, true >; refinementLevels 0; @@ -77,7 +91,8 @@ DomainSetup Boundaries { - + // Border { direction W; walldistance -1; flag TurbulentInflow; } + // Border { direction E; walldistance -1; flag Outflow; } } StabilityChecker @@ -89,7 +104,7 @@ StabilityChecker VTKWriter { - vtkWriteFrequency 10; + vtkWriteFrequency 100; vtkGhostLayers 0; velocity true; writeVelocityAsMagnitude false; @@ -103,7 +118,7 @@ VTKWriter oneFilePerProcess false; samplingResolution -1; initialWriteCallsToSkip 0; - lastStepAsVolumeFile true; + lastStepAsVolumeFile false; } Logging @@ -113,12 +128,12 @@ Logging readSetupFromFile false; remainingTimeLoggerFrequency 20; // in seconds writeSurfaceMeshOfTPMS true; - filestem TPMS; + filestem TPMSPeriodic; } Probes { evaluationFrequency 1; - writeCounter 1000; // after how many evaluations the results are written to file - filename probeData.h5; + writeCounter 50; // after how many evaluations the results are written to file + rampUpTime 50; } \ No newline at end of file diff --git a/apps/showcases/PorousMediaGPU/roughWall.prm b/apps/showcases/PorousMediaGPU/roughWall.prm index bf9a0f5f5..aeaa20eab 100644 --- a/apps/showcases/PorousMediaGPU/roughWall.prm +++ b/apps/showcases/PorousMediaGPU/roughWall.prm @@ -5,13 +5,11 @@ Parameters latticeVelocity 0.05; initialiseWithlatticeVelocity true; initialiseWithTurbulence true; - forceUpdateFrequency 10; - adaptDrivingForce false; welfordSamplingStart 200000; welfordSamplingFrequency 1; - timesteps 10; + timesteps 11; processMemoryLimit 512; // MiB innerOuterSplit <1, 1, 1>; @@ -20,6 +18,27 @@ Parameters gpuEnabledMPI false; } +Snapshot +{ + loadSnapshot false; + storeSnapshot true; + snapshotFrequency 10; + snapshotBaseFolder snapshots; +} + + +ForcingParameters +{ + forceUpdateFrequency 10; + adaptDrivingForce true; + writeFrequency 100; + targetBulkVelocity 0.05; + proportionalGain 2e-4; + derivativeGain 1e-6; + integralGain 2e-4; + maxRamp 1e-4; +} + Turbulence { useGrid false; @@ -38,8 +57,8 @@ RoughWall DomainSetup { - domainSize < 48.4, 40, 12 >; - cellsPerBlock < 121, 10, 30 >; + domainSize < 40, 40, 12 >; + cellsPerBlock < 100, 10, 30 >; blocks < 1, 10, 1 >; // domainSize < 48.4, 40, 12 >; @@ -47,7 +66,7 @@ DomainSetup // blocks < 1, 10, 1 >; periodic < true, false, true >; - refinementLevels 0; + refinementLevels 1; numberProcesses 1; // This is for load balancing, overwritten if more than one proc is used blockForestFilestem porousMediaBlockForest; } @@ -104,6 +123,6 @@ Probes DragEvaluation { - evaluationStart 200000; - evaluationFrequency 10; + evaluationStart 10; + evaluationFrequency 1; } \ No newline at end of file diff --git a/apps/showcases/PorousMediaGPU/utilGPU.cu b/apps/showcases/PorousMediaGPU/utilGPU.cu index 741cb6688..c5fddfd81 100644 --- a/apps/showcases/PorousMediaGPU/utilGPU.cu +++ b/apps/showcases/PorousMediaGPU/utilGPU.cu @@ -192,22 +192,40 @@ static __global__ void evaluateProbePoints(double* const density, double* const } } -static __global__ void sumVelocity(double* result, double* const velocity, uint8_t* const flagField, const int64_t n, const double cellVolume, const uint8_t domainFlag, +static __global__ void sumVelocity(double* result, double* const velocity, uint8_t* const flagField, const int64_t n, const double cellVolume, const uint_t fluidBitNr, const int64_t xSize, const int64_t ySize, const int64_t zSize, const int64_t xStrideVelocity, const int64_t yStrideVelocity, const int64_t zStrideVelocity, const int64_t fStrideVelocity, const int64_t xStrideFlag, const int64_t yStrideFlag, const int64_t zStrideFlag) { - if(blockDim.x*blockIdx.x + threadIdx.x < xSize && blockDim.y*blockIdx.y + threadIdx.y < ySize && blockDim.z*blockIdx.z + threadIdx.z < zSize) + if(blockDim.y*blockIdx.y + threadIdx.y < ySize && blockDim.z*blockIdx.z + threadIdx.z < zSize) { - const int64_t x = blockDim.x*blockIdx.x + threadIdx.x; + + extern __shared__ double sdata[]; + + const int64_t thx = threadIdx.x; + const int64_t x = blockIdx.x * (blockDim.x * 2) + threadIdx.x; const int64_t y = blockDim.y*blockIdx.y + threadIdx.y; const int64_t z = blockDim.z*blockIdx.z + threadIdx.z; - const double uX = cellVolume * velocity[xStrideVelocity*x + yStrideVelocity*y + zStrideVelocity*z + 0*fStrideVelocity]; + // const double ux = ((flagField[xStrideFlag*x + yStrideFlag*y + zStrideFlag*z]) >> (fluidBitNr) & 1) * cellVolume * velocity[xStrideVelocity*x + yStrideVelocity*y + zStrideVelocity*z + 0*fStrideVelocity]; + // atomicAdd(&result[n], ux); + + + double firstSum = (x < xSize) ? ((flagField[xStrideFlag*x + yStrideFlag*y + zStrideFlag*z]) >> (fluidBitNr) & 1) * cellVolume * velocity[xStrideVelocity*x + yStrideVelocity*y + zStrideVelocity*z + 0*fStrideVelocity] : 0; + if (x + blockDim.x < xSize) firstSum += ((flagField[xStrideFlag*(x + blockDim.x) + yStrideFlag*y + zStrideFlag*z]) >> (fluidBitNr) & 1) * cellVolume * velocity[xStrideVelocity*(x + blockDim.x) + yStrideVelocity*y + zStrideVelocity*z + 0*fStrideVelocity]; + sdata[thx] = firstSum; + __syncthreads(); - if( ( flagField[xStrideFlag*x + yStrideFlag*y + zStrideFlag*z] & domainFlag) == domainFlag ){ - atomicAdd(&result[n], uX); + for (unsigned int s = blockDim.x / 2; s > 0; s >>= 1) { + if (thx < s) { + sdata[thx] += sdata[thx + s]; + } + __syncthreads(); } + + if (thx == 0) atomicAdd(&result[n], sdata[0]); + + } } @@ -285,6 +303,8 @@ void BulkVelocityCalculator::calculateBulkVelocity() { dim3 b(block_[0], block_[1], block_[2]); dim3 g(grid_[0], grid_[1], grid_[2]); + const uint_t sharedMemorySize = block_[0] * sizeof(double); + int64_t counter = 0; for( auto it = blocks_->begin(); it != blocks_->end(); ++it ){ auto* block = dynamic_cast< Block* >(it.get()); @@ -310,10 +330,11 @@ void BulkVelocityCalculator::calculateBulkVelocity() { const int64_t yStrideFlag = int64_c(flagField->yStride()); const int64_t zStrideFlag = int64_c(flagField->zStride()); - internal::sumVelocity<<<g, b, 0, nullptr>>>(resultGPU_, data_velocity, data_flagField, counter, cellVolume, setup_.fluidFlag, - xSize, ySize, zSize, - xStrideVelocity, yStrideVelocity, zStrideVelocity, fStrideVelocity, - xStrideFlag, yStrideFlag, zStrideFlag); + internal::sumVelocity<<<g, b, sharedMemorySize, nullptr>>>(resultGPU_, data_velocity, data_flagField, counter, cellVolume, fluidBitNr_, + xSize, ySize, zSize, + xStrideVelocity, yStrideVelocity, zStrideVelocity, fStrideVelocity, + xStrideFlag, yStrideFlag, zStrideFlag); + counter++; } diff --git a/apps/showcases/PorousMediaGPU/utilGPU.h b/apps/showcases/PorousMediaGPU/utilGPU.h index 99bbc9fe5..ad732b0c7 100644 --- a/apps/showcases/PorousMediaGPU/utilGPU.h +++ b/apps/showcases/PorousMediaGPU/utilGPU.h @@ -50,26 +50,30 @@ class BulkVelocityCalculator { public: BulkVelocityCalculator(const std::shared_ptr<StructuredBlockForest> & blocks, const IDs& ids, const Setup& setup, const Config::BlockHandle& forceParameters) : blocks_(blocks), setup_(setup), ids_(ids) { -#if __CUDA_ARCH__ < 600 - WALBERLA_LOG_WARNING_ON_ROOT("Warning atomic add in double precision not supported. Falling back to software implementation") -#endif bool snapshotExists = false; const std::string snapshotFilename = setup_.snapshotBaseFolder + "/" + setup_.fileStem + "DrivingForceSnapshot.file"; - filesystem::path snapshotFilePath( fileName_.c_str() ); + filesystem::path snapshotFilePath( snapshotFilename.c_str() ); if( filesystem::exists( snapshotFilePath ) ) snapshotExists = true; - if(snapshotExists && setup_.loadSnapshot){ WALBERLA_LOG_INFO_ON_ROOT("Intialise PID Controller from Snapshot") forceUpdateFrequency_ = forceParameters.getParameter< uint_t >("forceUpdateFrequency", 0); adaptDrivingForce_ = forceParameters.getParameter< bool >("adaptDrivingForce", false); + writeFrequency_ = forceParameters.getParameter< uint_t >("writeFrequency", 100); + targetBulkVelocity_ = forceParameters.isDefined("targetBulkVelocity") ? forceParameters.getParameter< real_t >("targetBulkVelocity") : setup_.latticeVelocity; readStateFromFile(); + + if(!adaptDrivingForce_){ + drivingForce_ = setup_.drivingForce; + } } else{ forceUpdateFrequency_ = forceParameters.getParameter< uint_t >("forceUpdateFrequency", 0); adaptDrivingForce_ = forceParameters.getParameter< bool >("adaptDrivingForce", false); + writeFrequency_ = forceParameters.getParameter< uint_t >("writeFrequency", 100); + targetBulkVelocity_ = forceParameters.isDefined("targetBulkVelocity") ? forceParameters.getParameter< real_t >("targetBulkVelocity") : setup_.latticeVelocity; bulkVelocity_ = setup_.latticeVelocity; drivingForce_ = setup_.drivingForce; @@ -119,6 +123,8 @@ public: WALBERLA_GPU_CHECK(gpuMalloc((void **)&resultGPU_, sizeof(double) * resultCPU_.size() )); WALBERLA_GPU_CHECK(gpuMemset(resultGPU_, 0.0, sizeof(double) * resultCPU_.size())); + fluidBitNr_ = uint_c(std::log(setup_.fluidFlag) / std::log(2)); + fileName_ = setup_.outputFolder + "/" + setup_.fileStem + "DrivingForce.csv"; filesystem::path drivingForceOuputFile( fileName_.c_str() ); @@ -190,7 +196,20 @@ public: real_t getDrivingForce() { calculateBulkVelocity(); - const real_t error = setup_.latticeVelocity - bulkVelocity_; + const real_t timestep = real_c(executionCounter_ * forceUpdateFrequency_) * setup_.dt; + if(!adaptDrivingForce_){ + if(executionCounter_ % writeFrequency_ == 0){ + WALBERLA_ROOT_SECTION(){ + std::ofstream outfile( fileName_.c_str(), std::ios_base::app ); + outfile << executionCounter_ * forceUpdateFrequency_ << "," << timestep << "," << drivingForce_ << "," << bulkVelocity_ << "\n"; + outfile.close(); + } + } + executionCounter_++; + return drivingForce_; + } + + const real_t error = targetBulkVelocity_ - bulkVelocity_; const real_t d = (error + real_c(3.0) * errorHistory_[0] - real_c(3.0) * errorHistory_[1] - errorHistory_[2]) * (real_c(1.0) / real_c(6.0)); std::rotate(errorHistory_, errorHistory_ + 1, errorHistory_ + sizeof(errorHistory_) / sizeof(real_t)); @@ -208,11 +227,12 @@ public: drivingForce_ = std::min(std::max(minValue, newDrivingForce), maxValue); - const real_t timestep = real_c(executionCounter_ * forceUpdateFrequency_) * setup_.dt; - WALBERLA_ROOT_SECTION(){ - std::ofstream outfile( fileName_.c_str(), std::ios_base::app ); - outfile << executionCounter_ * forceUpdateFrequency_ << "," << timestep << "," << drivingForce_ << "," << bulkVelocity_ << "\n"; - outfile.close(); + if(executionCounter_ % writeFrequency_ == 0){ + WALBERLA_ROOT_SECTION(){ + std::ofstream outfile( fileName_.c_str(), std::ios_base::app ); + outfile << executionCounter_ * forceUpdateFrequency_ << "," << timestep << "," << drivingForce_ << "," << bulkVelocity_ << "\n"; + outfile.close(); + } } executionCounter_++; @@ -225,10 +245,12 @@ private: const IDs ids_; uint_t forceUpdateFrequency_; + uint_t writeFrequency_; bool adaptDrivingForce_; real_t bulkVelocity_; real_t drivingForce_; + real_t targetBulkVelocity_; real_t proportionalGain_; real_t derivateGain_; @@ -245,6 +267,7 @@ private: std::string fileName_; double volume_; + uint_t fluidBitNr_; std::vector<double> resultCPU_; double* resultGPU_; -- GitLab