From 902a0196237a1ab9356bc0f1835fa6f324a75db4 Mon Sep 17 00:00:00 2001 From: Markus Holzer <markus.holzer@onera.fr> Date: Fri, 21 Feb 2025 14:01:14 +0100 Subject: [PATCH] [skip ci] update --- apps/benchmarks/NonUniformGridGPU/LdcSetup.h | 17 +- .../NonUniformGridGPU/NonUniformGridGPU.cpp | 3 +- .../NonUniformGridGPU/NonUniformGridGPU.py | 12 +- .../simulation_setup/benchmark_configs.py | 15 +- apps/showcases/Debug/CMakeLists.txt | 49 ++ apps/showcases/Debug/Debug.cpp | 590 ++++++++++++++++++ apps/showcases/Debug/Debug.py | 126 ++++ apps/showcases/PorousMediaGPU/CMakeLists.txt | 19 +- apps/showcases/PorousMediaGPU/Debug.cpp | 468 ++++++++++++++ apps/showcases/PorousMediaGPU/Debug.py | 127 ++++ .../PorousMediaGPU/DragEvaluation.cpp | 48 +- .../showcases/PorousMediaGPU/GridGeneration.h | 19 +- .../PorousMediaGPU/InitializerFunctions.cpp | 27 +- .../PorousMediaGPU/PorousMediaGPU.cpp | 151 +++-- .../PorousMediaGPU/PorousMediaGPU.py | 39 +- apps/showcases/PorousMediaGPU/Setup.h | 44 +- apps/showcases/PorousMediaGPU/TPMS.prm | 4 +- apps/showcases/PorousMediaGPU/roughWall.prm | 24 +- python/lbmpy_walberla/packing_kernels.py | 6 +- python/lbmpy_walberla/sweep_collection.py | 14 - python/lbmpy_walberla/walberla_lbm_package.py | 25 +- python/pystencils_walberla/jinja_filters.py | 88 ++- .../gpu/BasicRecursiveTimeStepGPU.impl.h | 2 +- .../refinement/RefinementScaling.h | 17 +- 24 files changed, 1753 insertions(+), 181 deletions(-) create mode 100644 apps/showcases/Debug/CMakeLists.txt create mode 100644 apps/showcases/Debug/Debug.cpp create mode 100644 apps/showcases/Debug/Debug.py create mode 100644 apps/showcases/PorousMediaGPU/Debug.cpp create mode 100644 apps/showcases/PorousMediaGPU/Debug.py diff --git a/apps/benchmarks/NonUniformGridGPU/LdcSetup.h b/apps/benchmarks/NonUniformGridGPU/LdcSetup.h index b8431f2f2..49564790f 100644 --- a/apps/benchmarks/NonUniformGridGPU/LdcSetup.h +++ b/apps/benchmarks/NonUniformGridGPU/LdcSetup.h @@ -48,13 +48,18 @@ class LDCRefinement { const AABB & domain = forest.getDomain(); - const AABB leftCorner( 0, domain.yMax() -1, 0, 1, domain.yMax() , domain.zMax() ); - const AABB rightCorner( domain.xMax() - 1, domain.yMax() -1, 0, domain.xMax(), domain.yMax() , domain.zMax() ); + const real_t t = 1.0; + + const AABB minAABB(domain.xMin(), domain.yMin(), domain.zMin(), domain.xMax(), domain.yMin() + t, domain.zMax()); + const AABB maxAABB(domain.xMin(), domain.yMax() - t, domain.zMin(), domain.xMax(), domain.yMax(), domain.zMax()); + + // const AABB leftCorner( 0, domain.yMax() -1, 0, 1, domain.yMax() , domain.zMax() ); + // const AABB rightCorner( domain.xMax() - 1, domain.yMax() -1, 0, domain.xMax(), domain.yMax() , domain.zMax() ); for(auto & block : forest) { auto & aabb = block.getAABB(); - if( leftCorner.intersects( aabb ) || rightCorner.intersects( aabb ) ) + if( minAABB.intersects( aabb ) || maxAABB.intersects( aabb ) ) { if( block.getLevel() < refinementDepth_) block.setMarker( true ); @@ -88,14 +93,14 @@ class LDC const uint_t level = b.getLevel(); auto flagField = b.getData< FlagField_T >(flagFieldID); const uint8_t noslipFlag = flagField->registerFlag(noSlipFlagUID_); - const uint8_t ubbFlag = flagField->registerFlag(ubbFlagUID_); + // const uint8_t ubbFlag = flagField->registerFlag(ubbFlagUID_); for (auto cIt = flagField->beginWithGhostLayerXYZ(2); cIt != flagField->end(); ++cIt) { const Cell localCell = cIt.cell(); Cell globalCell(localCell); sbfs.transformBlockLocalToGlobalCell(globalCell, b); - if (globalCell.y() >= cell_idx_c(sbfs.getNumberOfYCells(level))) { flagField->addFlag(localCell, ubbFlag); } - else if (globalCell.y() < 0 || globalCell.x() < 0 || globalCell.x() >= cell_idx_c(sbfs.getNumberOfXCells(level))) + if (globalCell.y() >= cell_idx_c(sbfs.getNumberOfYCells(level))) { flagField->addFlag(localCell, noslipFlag); } + else if (globalCell.y() < 0) { flagField->addFlag(localCell, noslipFlag); } diff --git a/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp index 818f612b9..b824434af 100644 --- a/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp +++ b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp @@ -98,6 +98,7 @@ int main(int argc, char** argv) // Reading parameters auto parameters = config->getOneBlock("Parameters"); const real_t omega = parameters.getParameter< real_t >("omega", real_c(1.95)); + const real_t fx = parameters.getParameter< real_t >("force", real_c(1e-6)); const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(50)); const bool gpuEnabledMPI = parameters.getParameter< bool >("gpuEnabledMPI", false); @@ -145,7 +146,7 @@ int main(int argc, char** argv) Cell(parameters.getParameter< Vector3< cell_idx_t > >("innerOuterSplit", Vector3< cell_idx_t >(1, 1, 1))); Vector3< int32_t > gpuBlockSize = parameters.getParameter< Vector3< int32_t > >("gpuBlockSize", Vector3< int32_t >(256, 1, 1)); - SweepCollection_T sweepCollection(blocks, pdfFieldGpuID, densityFieldGpuID, velFieldGpuID, gpuBlockSize[0], + SweepCollection_T sweepCollection(blocks, pdfFieldGpuID, densityFieldGpuID, velFieldGpuID, fx, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2], omega, innerOuterSplit); for (auto& iBlock : *blocks){ diff --git a/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.py b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.py index a1f5bff23..ebe42a676 100644 --- a/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.py +++ b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.py @@ -11,6 +11,7 @@ from lbmpy import LBMConfig, LBMOptimisation, Stencil, Method, LBStencil, Subgri from pystencils_walberla import CodeGeneration, generate_info_header from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator +from lbmpy_walberla import RefinementScaling omega = sp.symbols("omega") omega_free = sp.Symbol("omega_free") @@ -36,8 +37,8 @@ with CodeGeneration() as ctx: streaming_pattern = 'esopull' timesteps = get_timesteps(streaming_pattern) - stencil = LBStencil(Stencil.D3Q19) - method_enum = Method.CUMULANT + stencil = LBStencil(Stencil.D3Q27) + method_enum = Method.SRT fourth_order_correction = 0.01 if method_enum == Method.CUMULANT and stencil.Q == 27 else False collision_setup = "cumulant-K17" if fourth_order_correction else method_enum.name.lower() @@ -47,7 +48,9 @@ with CodeGeneration() as ctx: density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_type}[3D]", layout='fzyx') macroscopic_fields = {'density': density_field, 'velocity': velocity_field} + fx = sp.Symbol("Fx") lbm_config = LBMConfig(stencil=stencil, method=method_enum, relaxation_rate=omega, compressible=True, + force=tuple([fx, 0.0, 0.0]), fourth_order_correction=fourth_order_correction, streaming_pattern=streaming_pattern) lbm_opt = LBMOptimisation(cse_global=False, field_layout='fzyx') @@ -60,10 +63,13 @@ with CodeGeneration() as ctx: ubb = lbm_boundary_generator(class_name='UBB', flag_uid='UBB', boundary_object=UBB([0.05, 0, 0], data_type=field_type)) + refinement_scaling = RefinementScaling() + refinement_scaling.add_standard_relaxation_rate_scaling(omega) + refinement_scaling.add_force_scaling(fx) generate_lbm_package(ctx, name="NonUniformGridGPU", collision_rule=collision_rule, lbm_config=lbm_config, lbm_optimisation=lbm_opt, - nonuniform=True, boundaries=[no_slip, ubb], + nonuniform=True, refinement_scaling=refinement_scaling, boundaries=[no_slip, ubb], macroscopic_fields=macroscopic_fields, target=ps.Target.GPU, gpu_indexing_params=gpu_indexing_params, max_threads=max_threads) diff --git a/apps/benchmarks/NonUniformGridGPU/simulation_setup/benchmark_configs.py b/apps/benchmarks/NonUniformGridGPU/simulation_setup/benchmark_configs.py index 6f7b6820a..f876ae86f 100644 --- a/apps/benchmarks/NonUniformGridGPU/simulation_setup/benchmark_configs.py +++ b/apps/benchmarks/NonUniformGridGPU/simulation_setup/benchmark_configs.py @@ -40,7 +40,7 @@ class Scenario: self.domain_size = domain_size self.root_blocks = root_blocks self.cells_per_block = cells_per_block - self.periodic = (0, 0, 1) + self.periodic = (1, 0, 1) self.refinement_depth = refinement_depth self.num_processes = num_processes @@ -72,16 +72,17 @@ class Scenario: 'blockForestFilestem': self.bfs_filestem, 'writeVtk': self.write_setup_vtk, 'outputStatistics': True, - 'writeSetupForestAndReturn': True, + 'writeSetupForestAndReturn': False, }, 'Parameters': { 'omega': 1.95, + 'force': 1e-6, 'timesteps': self.timesteps, 'remainingTimeLoggerFrequency': self.logger_frequency, 'vtkWriteFrequency': self.vtk_write_frequency, - 'useVTKAMRWriter': True, + 'useVTKAMRWriter': False, 'oneFilePerProcess': False, - 'writeOnlySlice': False, + 'writeOnlySlice': True, 'gpuEnabledMPI': self.gpu_enabled_mpi, 'gpuBlockSize': (128, 1, 1), }, @@ -144,9 +145,9 @@ def validation_run(): scenario = Scenario(domain_size=domain_size, root_blocks=root_blocks, cells_per_block=cells_per_block, - timesteps=0, - vtk_write_frequency=0, - refinement_depth=3, + timesteps=1001, + vtk_write_frequency=1000, + refinement_depth=1, gpu_enabled_mpi=False) scenarios.add(scenario) diff --git a/apps/showcases/Debug/CMakeLists.txt b/apps/showcases/Debug/CMakeLists.txt new file mode 100644 index 000000000..31af001f3 --- /dev/null +++ b/apps/showcases/Debug/CMakeLists.txt @@ -0,0 +1,49 @@ +waLBerla_link_files_to_builddir( *.py ) +waLBerla_link_files_to_builddir( *.prm ) +waLBerla_link_files_to_builddir( *.png ) +waLBerla_link_files_to_builddir( *.obj ) +waLBerla_link_files_to_builddir( *.stl ) +waLBerla_link_files_to_builddir( *.mtl ) + +waLBerla_generate_target_from_python(NAME PorousMediaGPUCodeGen + FILE PorousMediaGPU.py + OUT_FILES PorousMediaGPUSweepCollection.h PorousMediaGPUSweepCollection.${CODEGEN_FILE_SUFFIX} + PorousMediaGPUStorageSpecification.h PorousMediaGPUStorageSpecification.${CODEGEN_FILE_SUFFIX} + NoSlip.h NoSlip.${CODEGEN_FILE_SUFFIX} + Obstacle.h Obstacle.${CODEGEN_FILE_SUFFIX} + UBB.h UBB.${CODEGEN_FILE_SUFFIX} + TurbulentInflow.h TurbulentInflow.${CODEGEN_FILE_SUFFIX} + Outflow.h Outflow.${CODEGEN_FILE_SUFFIX} + Welford.h Welford.${CODEGEN_FILE_SUFFIX} + PorousMediaGPUBoundaryCollection.h + PorousMediaGPUInfoHeader.h + PorousMediaGPUStaticDefines.h) + +if (WALBERLA_BUILD_WITH_CUDA OR WALBERLA_BUILD_WITH_HIP) + waLBerla_add_executable ( NAME PorousMediaGPU + FILES PorousMediaGPU.cpp GridGeneration.h Setup.h Types.h DragEvaluation.cpp InitializerFunctions.cpp TPMS.cpp Inflow.cpp Probe.cpp util.cpp utilGPU.cu + DEPENDS blockforest boundary core field gpu io lbm_generated postprocessing stencil timeloop vtk PorousMediaGPUCodeGen ) +else() + waLBerla_add_executable ( NAME PorousMediaGPU + FILES PorousMediaGPU.cpp GridGeneration.h Setup.h Types.h DragEvaluation.cpp InitializerFunctions.cpp TPMS.cpp Inflow.cpp Probe.cpp util.cpp + DEPENDS blockforest boundary core field io lbm_generated postprocessing stencil timeloop vtk PorousMediaGPUCodeGen ) + +endif(WALBERLA_BUILD_WITH_CUDA OR WALBERLA_BUILD_WITH_HIP) + +waLBerla_generate_target_from_python(NAME DebugCodeGen + FILE Debug.py + OUT_FILES DebugSweepCollection.h DebugSweepCollection.cpp + DebugStorageSpecification.h DebugStorageSpecification.cpp + NoSlip.h NoSlip.cpp + Obstacle.h Obstacle.cpp + UBB.h UBB.cpp + TurbulentInflow.h TurbulentInflow.cpp + Outflow.h Outflow.cpp + Welford.h Welford.cpp + DebugBoundaryCollection.h + DebugInfoHeader.h + DebugStaticDefines.h) + +waLBerla_add_executable ( NAME Debug + FILES Debug.cpp GridGeneration.h Setup.h Types.h DragEvaluation.cpp InitializerFunctions.cpp TPMS.cpp Inflow.cpp Probe.cpp util.cpp + DEPENDS blockforest boundary core field io lbm_generated postprocessing stencil timeloop vtk DebugCodeGen ) diff --git a/apps/showcases/Debug/Debug.cpp b/apps/showcases/Debug/Debug.cpp new file mode 100644 index 000000000..c7b8a949f --- /dev/null +++ b/apps/showcases/Debug/Debug.cpp @@ -0,0 +1,590 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file PorousMediaGPU.cpp +//! \author Markus Holzer <markus.holzer@fau.de> +// +//====================================================================================================================== + +#include "blockforest/Initialization.h" +#include "blockforest/StructuredBlockForest.h" +#include "blockforest/communication/UniformBufferedScheme.h" + +#include "core/Abort.h" +#include "core/DataTypes.h" +#include "core/SharedFunctor.h" +#include "core/debug/CheckFunctions.h" +#include "core/logging/Logging.h" +#include "core/math/Vector3.h" +#include "core/math/Constants.h" +#include "core/Environment.h" +#include "core/mpi/MPIManager.h" +#include "core/MemoryUsage.h" +#include "core/mpi/Reduce.h" +#include "core/timing/RemainingTimeLogger.h" +#include "core/timing/TimingPool.h" + +#include "field/AddToStorage.h" +#include "field/CellCounter.h" +#include "field/FlagField.h" +#include "field/StabilityChecker.h" +#include "field/adaptors/AdaptorCreators.h" +#include "field/iterators/FieldIterator.h" +#include "field/vtk/VTKWriter.h" +#include "field/vtk/FlagFieldCellFilter.h" + +#include "geometry/InitBoundaryHandling.h" +#include "geometry/mesh/TriangleMeshIO.h" + +#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT) +# include "gpu/AddGPUFieldToStorage.h" +# include "gpu/DeviceSelectMPI.h" +# include "gpu/ErrorChecking.h" +# include "gpu/FieldCopy.h" +# include "gpu/HostFieldAllocator.h" +# include "gpu/ParallelStreams.h" +# include "gpu/communication/NonUniformGPUScheme.h" +#endif + +#include "lbm_generated/communication/UniformGeneratedPdfPackInfo.h" +#include "lbm_generated/evaluation/PerformanceEvaluation.h" +#include "lbm_generated/field/AddToStorage.h" +#include "lbm_generated/field/PdfField.h" + +#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT) +# include "lbm_generated/gpu/AddToStorage.h" +# include "lbm_generated/gpu/GPUPdfField.h" +# include "lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h" +# include "lbm_generated/gpu/BasicRecursiveTimeStepGPU.h" +#endif + +#include "postprocessing/FieldToSurfaceMesh.h" + +#include "timeloop/SweepTimeloop.h" +#include "vtk/VTKOutput.h" + +#include <cstdlib> +#include <iostream> +#include <memory> + +#include "DragEvaluation.h" +#include "Inflow.h" +#include "InitializerFunctions.h" +#include "GridGeneration.h" +#include "Setup.h" +#include "Probe.h" +#include "TPMS.h" +#include "Types.h" +#include "util.h" + +#include "PorousMediaGPUStaticDefines.h" + +using namespace walberla; +using macroFieldType = VelocityField_T::value_type; + +#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT) +using GPUPdfField_T = lbm_generated::GPUPdfField< StorageSpecification_T >; +using gpu::communication::NonUniformGPUScheme; +using lbm_generated::NonuniformGeneratedGPUPdfPackInfo; +#else +using PdfField_T = lbm_generated::PdfField< StorageSpecification_T >; +using blockforest::communication::UniformBufferedScheme; + +using lbm_generated::UniformGeneratedPdfPackInfo; +#endif + +inline Vector3< uint_t > inflowSliceSize( const shared_ptr< StructuredBlockStorage > & blocks, IBlock * const block ){ + return Vector3<uint_t>( uint_c(1), blocks->getNumberOfYCells( *block ), blocks->getNumberOfZCells( *block ) ); +} + +class HemisphereWallDistance +{ + public: + HemisphereWallDistance(const real_t radius, const real_t hemispheresDistance) : radius_(radius), hemispheresDistance_(hemispheresDistance){} + + real_t operator()(const Cell& fluidCell, const Cell& boundaryCell, + const shared_ptr< StructuredBlockForest >& SbF, IBlock& block){ + + auto domain = SbF->getDomain(); + const real_t yMin = domain.yMin(); + const real_t yMax = domain.yMax(); + + Vector3< real_t > boundary = SbF->getBlockLocalCellCenter( block, boundaryCell ); + Vector3< real_t > fluid = SbF->getBlockLocalCellCenter( block, fluidCell ); + + auto center = getCenterOfHemisphere(boundary, radius_, hemispheresDistance_, yMin, yMax); + + hemispheresContains(boundary, center, radius_); + + WALBERLA_CHECK(!hemispheresContains(fluid, center, radius_), "fluid cell is in contained in Hemisphere (" << fluid[0] << ", " << fluid[1] << ", " << fluid[2] << "). The block local cell is " << fluidCell) + WALBERLA_CHECK(hemispheresContains(boundary, center, radius_), "boundary cell is not in contained in Hemisphere (" << boundary[0] << ", " << boundary[1] << ", " << boundary[2] << "). The block local cell is " << boundaryCell) + + return delta(fluid, boundary, center); + } + + real_t delta(const Vector3< real_t >& fluid, const Vector3< real_t >& boundary, const Vector3< real_t >& center) const + { + // http://devmag.org.za/2009/04/17/basic-collision-detection-in-2d-part-2/ + const Vector3< real_t > f = fluid - center; + const Vector3< real_t > d = (boundary - center) - f; + + const real_t dDotd = d[0] * d[0] + d[1] * d[1] + d[2] * d[2]; + const real_t fDotf = f[0] * f[0] + f[1] * f[1] + f[2] * f[2]; + const real_t dDotf = d[0] * f[0] + d[1] * f[1] + d[2] * f[2]; + + const real_t b = real_c(2.0) * dDotf; + const real_t c = fDotf - (radius_ * radius_); + + const real_t bb4ac = b * b - (real_c(4.0) * dDotd * c); + WALBERLA_CHECK_GREATER_EQUAL(bb4ac, real_c(0.0)) + + const real_t sqrtbb4ac = std::sqrt(bb4ac); + const real_t alpha = std::min((-b + sqrtbb4ac) / (real_c(2.0) * dDotd), (-b - sqrtbb4ac) / (real_c(2.0) * dDotd)); + + WALBERLA_CHECK_GREATER_EQUAL(alpha, real_c(0.0)) + WALBERLA_CHECK_LESS_EQUAL(alpha, real_c(1.0)) + + return alpha; + } + + +private: + const real_t radius_; + const real_t hemispheresDistance_; +}; + +real_t defaultWallDistance(const Cell& /*fluidCell*/, const Cell& /*boundaryCell*/, const shared_ptr< StructuredBlockForest >& /*SbF*/, IBlock& /*block*/){ + return real_c(0.5); +} + +void refineWalls(SetupBlockForest& forest, const real_t hemispheresRadius, const real_t dx, const uint_t level) +{ + auto domain = forest.getDomain(); + const real_t yMin = domain.yMin(); + const real_t yMax = domain.yMax(); + + const real_t t = hemispheresRadius + dx; + + const AABB minAABB(domain.xMin(), domain.yMin(), domain.zMin(), domain.xMax(), domain.yMin() + t, domain.zMax()); + // const AABB maxAABB(domain.xMin(), domain.yMax() - t, domain.zMin(), domain.xMax(), domain.yMax(), domain.zMax()); + + for (auto block = forest.begin(); block != forest.end(); ++block){ + const AABB& aabb = block->getAABB(); + + // if (block->getLevel() < level && (minAABB.intersects(aabb) || maxAABB.intersects(aabb)) ){ + if (block->getLevel() < level && minAABB.intersects(aabb) ){ + block->setMarker(true); + } + } +} + + + +int main(int argc, char** argv){ + Environment env( argc, argv ); + mpi::MPIManager::instance()->useWorldComm(); +#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT) + gpu::selectDeviceBasedOnMpiRank(); +#endif + + auto config = env.config(); + + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /// SETUP AND CONFIGURATION /// + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + auto parameters = config->getOneBlock("Parameters"); + auto loggingParameters = config->getOneBlock("Logging"); + auto boundariesConfig = config->getBlock("Boundaries"); + + Setup setup(config, infoMap); + WALBERLA_LOG_INFO_ON_ROOT( "Simulation run data:" + "\n- simulation parameters:" << std::setprecision(16) << + "\n + collision model: " << setup.collisionModel << + "\n + stencil: " << setup.stencil << + "\n + streaming: " << setup.streamingPattern << + "\n + compressible: " << ( StorageSpecification_T::compressible ? "yes" : "no" ) << + "\n- simulation properties: " + "\n + kin. viscosity: " << setup.kinViscosity << " [m^2/s] (on the coarsest grid)" << + "\n + viscosity LB: " << setup.viscosity << " [dx*dx/dt] (on the coarsest grid)" << + "\n + omega: " << setup.omega << " (on the coarsest grid)" << + "\n + omega: " << setup.omegaF << " (on the finest grid)" << + "\n + inflow velocity: " << setup.referenceVelocity << " [m/s]" << + "\n + lattice velocity: " << setup.latticeVelocity << " [dx/dt]" << + "\n + Reynolds number: " << setup.reynoldsNumber << + "\n + Mach number: " << setup.machNumber << + "\n + driving force: " << setup.drivingForce << + "\n + dx (coarsest grid): " << setup.dxC << " [m]" << + "\n + dx (finest grid): " << setup.dxF << " [m]" << + "\n + dt (coarsest grid): " << setup.dt << " [s]" << + "\n + conversion Faktor: " << setup.conversionFactor << + "\n + #time steps: " << setup.timesteps << " (on the coarsest grid, " << ( real_c(1.0) / setup.dt ) << " for 1s of real time)" << + "\n + simulation time: " << ( real_c( setup.timesteps ) * setup.dt ) << " [s]" ) + + bool writeSetupForestAndReturn = loggingParameters.getParameter< bool >("writeSetupForestAndReturn", false); + if (uint_c(MPIManager::instance()->numProcesses()) > 1) writeSetupForestAndReturn = false; + shared_ptr< BlockForest > bfs; + + auto refinementSelection = std::bind(refineWalls, _1, real_c(1.0), setup.dxF, setup.refinementLevels); + + createBlockForest(bfs, setup, refinementSelection); + + if (writeSetupForestAndReturn && mpi::MPIManager::instance()->numProcesses() == 1){ + WALBERLA_LOG_INFO_ON_ROOT("BlockForest has been created and writen to file. Returning program") + logging::Logging::printFooterOnStream(); + return EXIT_SUCCESS; + } + + auto blocks = std::make_shared< StructuredBlockForest >(bfs, setup.cellsPerBlock[0], setup.cellsPerBlock[1], setup.cellsPerBlock[2]); + blocks->createCellBoundingBoxes(); + auto finalDomain = blocks->getDomain(); + + const StorageSpecification_T StorageSpec = StorageSpecification_T(); + // Creating fields + uint_t step = 0; + WALBERLA_LOG_INFO_ON_ROOT(++step << ". Allocating data") + IDs ids; + ids.pdfField = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(1), field::fzyx); + + auto allocator = make_shared< gpu::HostFieldAllocator<macroFieldType> >(); // use pinned memory allocator for faster CPU-GPU memory transfers + ids.velocityField = field::addToStorage< VelocityField_T >(blocks, "vel", macroFieldType(0.0), field::fzyx, uint_c(1), allocator); + ids.meanVelocityField = field::addToStorage< VelocityField_T >(blocks, "mean velocity", macroFieldType(0.0), field::fzyx, uint_c(1), allocator); + ids.densityField = field::addToStorage< ScalarField_T >(blocks, "density", macroFieldType(1.0), field::fzyx, uint_c(1), allocator); + ids.flagField = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field"); + + ids.pdfFieldGPU = lbm_generated::addGPUPdfFieldToStorage< PdfField_T >(blocks, ids.pdfField, StorageSpec, "pdfs on GPU", true); + ids.velocityFieldGPU = gpu::addGPUFieldToStorage< VelocityField_T >(blocks, ids.velocityField, "velocity on GPU", true); + ids.meanVelocityFieldGPU = gpu::addGPUFieldToStorage< VelocityField_T >(blocks, ids.meanVelocityField, "mean velocity on GPU", true); + ids.densityFieldGPU = gpu::addGPUFieldToStorage< ScalarField_T >(blocks, ids.densityField, "velocity on GPU", true); + + + std::function< Vector3< uint_t > (const shared_ptr< StructuredBlockStorage >&, IBlock * const) > getInflowSliceSize = inflowSliceSize; + ids.velocityFieldInflowSlice = field::addToStorage< VelocityField_T >(blocks, "velocity Inflow", getInflowSliceSize, macroFieldType(0.0), field::fzyx, uint_c(1)); + ids.velocityFieldInflowSliceGPU = gpu::addGPUFieldToStorage< VelocityField_T >(blocks, ids.velocityFieldInflowSlice, "velocity Inflow on GPU", true); + + const Cell innerOuterSplit = Cell(parameters.getParameter< Vector3<cell_idx_t> >("innerOuterSplit", Vector3<cell_idx_t>(1, 1, 1))); + SweepCollection_T sweepCollection(blocks, ids.pdfFieldGPU, ids.densityFieldGPU, ids.velocityFieldGPU, setup.drivingForce, setup.omega, innerOuterSplit); + WelfordSweep_T welfordSweep(ids.meanVelocityFieldGPU, ids.velocityFieldGPU, real_c(0.0)); + + WALBERLA_LOG_INFO_ON_ROOT(++step << ". Initialising data") + for (auto& block : *blocks){ + sweepCollection.initialise(&block); + + auto velocityInflow = block.getData< VelocityField_T >(ids.velocityFieldInflowSlice); + WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(velocityInflow, + velocityInflow->get(x, y, z, 0) = setup.latticeVelocity; + ) + } + sweepCollection.initialiseBlockPointer(); + gpu::fieldCpy< gpu::GPUField< VelocityField_T::value_type >, VelocityField_T >(blocks, ids.velocityFieldInflowSliceGPU, ids.velocityFieldInflowSlice); + + auto inflow = std::make_shared<Inflow>(blocks, setup, ids); + auto probe = std::make_shared<Probe>(blocks, setup, ids, config->getOneBlock("Probes")); + + const real_t lenght = 8.0; + const real_t res = 3.0; + const real_t start = 0.0; + +/* + const uint_t nx = uint_c(std::floor(lenght / (setup.dxC * res))); + for(uint_t i = 0; i < nx; ++i){ + probe->addPoint(Vector3<real_t>(start + real_c(i) * (setup.dxC * res), 0.5, 0.5 )); + } +*/ + probe->initialise(); + + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /// LB SWEEPS AND BOUNDARY HANDLING /// + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + // Boundaries + WALBERLA_LOG_INFO_ON_ROOT(++step << ". Setting boundary conditions") + for (auto& block : *blocks){ + auto flagField = block.getData< FlagField_T >(ids.flagField); + setup.inflowFlag = flagField->registerFlag(setup.inflowUID); + setup.turbulentInflowFlag = flagField->registerFlag(setup.turbulentInflowUID); + setup.outflowFlag = flagField->registerFlag(setup.outflowUID); + setup.obstacleFlag = flagField->registerFlag(setup.obstacleUID); + setup.noSlipFlag = flagField->registerFlag(setup.noSlipUID); + setup.fluidFlag = flagField->registerFlag(setup.fluidUID); + } + + + geometry::initBoundaryHandling< FlagField_T >(*blocks, ids.flagField, boundariesConfig); + std::function< VelocityField_T::value_type (const Cell&, const Cell&, const shared_ptr< StructuredBlockForest >&, IBlock&) > wallDistanceFunctor = defaultWallDistance; + if(setup.useGrid){ + WALBERLA_LOG_INFO_ON_ROOT(++step << ". Setting Grid after inflow to produce turbulence structure") + initGrid(blocks, ids.flagField, setup); + } + +/* + if(config->getNumBlocks("RoughWall") > 0){ + WALBERLA_LOG_INFO_ON_ROOT(++step << ". Using RoughWall setup") + auto wallParameters = config->getOneBlock("RoughWall"); + const real_t radius = wallParameters.getParameter< real_t >("hemispheresRadius"); + const real_t hemispheresDistance = wallParameters.getParameter< real_t >("hemispheresDistance"); + initHemisphers(blocks, ids.flagField, setup, config->getOneBlock("RoughWall")); + HemisphereWallDistance wallDist(radius, hemispheresDistance); + wallDistanceFunctor = wallDist; + } +*/ + if(config->getNumBlocks("TPMS") > 0){ + WALBERLA_LOG_INFO_ON_ROOT(++step << ". Placing TPMS material") + auto tpms = std::make_shared<TPMS>(config->getOneBlock("TPMS"), setup); + tpms->setupBoundary(blocks, ids.flagField); + } + + + geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, ids.flagField, setup.fluidUID); + // BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfFieldGPU, setup.fluidUID, ids.velocityFieldInflowSliceGPU, setup.drivingForce); + BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfFieldGPU, setup.fluidUID, ids.velocityFieldInflowSliceGPU, setup.latticeVelocity, wallDistanceFunctor, ids.pdfField); + // BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfFieldGPU, setup.fluidUID, ids.pdfField); + + if(config->getNumBlocks("TPMS") > 0 && loggingParameters.getParameter< bool >("writeSurfaceMeshOfTPMS", false) ){ + WALBERLA_LOG_INFO_ON_ROOT(++step << ". Writing TPMS as surface file") + auto mesh = postprocessing::flagFieldToSurfaceMesh< FlagField_T >(blocks, ids.flagField, setup.obstacleFlag, true); + WALBERLA_EXCLUSIVE_WORLD_SECTION(0){ + geometry::writeMesh("TPMS.obj", *mesh); + } + } + + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /// COMMUNICATION SCHEME /// + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + const bool gpuEnabledMPI = parameters.getParameter< bool >("gpuEnabledMPI", false); + auto nonUniformCommunication = std::make_shared< NonUniformGPUScheme< CommunicationStencil_T > >(blocks, gpuEnabledMPI); + auto nonUniformPackInfo = lbm_generated::setupNonuniformGPUPdfCommunication< GPUPdfField_T >(blocks, ids.pdfFieldGPU); + nonUniformCommunication->addPackInfo(nonUniformPackInfo); + + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /// TIME STEP DEFINITIONS /// + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + const int streamLowPriority = 0; + auto defaultStream = gpu::StreamRAII::newPriorityStream(streamLowPriority); + SweepTimeloop timeloop(blocks->getBlockStorage(), setup.timesteps); + + timeloop.addFuncBeforeTimeStep(inflow->updateGPUFunctor(), "Update inflow velocity"); + timeloop.addFuncBeforeTimeStep(probe->updateFunctor(), "Update Probe Data"); + + lbm_generated::BasicRecursiveTimeStepGPU< GPUPdfField_T, SweepCollection_T, BoundaryCollection_T > + LBMMeshRefinement(blocks, ids.pdfFieldGPU, sweepCollection, boundaryCollection, nonUniformCommunication, nonUniformPackInfo); + + // timeloop.addFuncBeforeTimeStep(LBMMeshRefinement, "Refinement timestep"); + LBMMeshRefinement.addRefinementToTimeLoop(timeloop, uint_c(0)); + + shared_ptr< DragEvaluation > evaluation; + if(config->getNumBlocks("RoughWall") > 0){ + WALBERLA_LOG_INFO_ON_ROOT(++step << ". Setup Drag Evaluation") + evaluation = make_shared<DragEvaluation>(blocks, boundaryCollection, ids, setup, config); + LBMMeshRefinement.addPostBoundaryHandlingBlockFunction(evaluation->forceCalculationFunctor()); + + } + + + const AABB sliceXY(finalDomain.xMin(), finalDomain.yMin(), finalDomain.center()[2] - setup.dxF, + finalDomain.xMax(), finalDomain.yMax(), finalDomain.center()[2] + setup.dxF); + +/* + CellInterval globalCellInterval; + blocks->getCellBBFromAABB( globalCellInterval, sliceXY, uint_c(0) ); + + std::map<IBlock*, CellInterval> sliceCellIntervals; + + for (auto& block : *blocks){ + CellInterval cellBB; + blocks->getCellBBFromAABB( cellBB, sliceXY, blocks->getLevel(block) ); + blocks->transformGlobalToBlockLocalCellInterval( cellBB, block ); + + Cell maxCell(cell_idx_c(0), cell_idx_c(0), cell_idx_c(0)); + Cell minCell(cell_idx_c( blocks->getNumberOfXCells( block ) ), cell_idx_c( blocks->getNumberOfYCells( block ) ), cell_idx_c( blocks->getNumberOfZCells( block ) )); + + for( cell_idx_t z = cell_idx_c(0); z != cell_idx_c( blocks->getNumberOfZCells( block ) ); ++z ){ + for( cell_idx_t y = cell_idx_c(0); y != cell_idx_c( blocks->getNumberOfYCells( block ) ); ++y ){ + for( cell_idx_t x = cell_idx_c(0); x != cell_idx_c( blocks->getNumberOfXCells( block ) ); ++x ){ + if( cellBB.contains(x,y,z) ){ + if(Cell(x, y, z) < minCell){ + minCell = Cell(x, y, z); + } + if(!(Cell(x, y, z) < maxCell)){ + maxCell = Cell(x, y, z); + } + } + } + } + } + CellInterval localCellInterval(minCell, maxCell); + if(!localCellInterval.empty()){ + sliceCellIntervals[&block] = localCellInterval; + } + } + + timeloop.add() << BeforeFunction([&]() + { + for (auto const& sliceCellInterval : sliceCellIntervals){ + sweepCollection.calculateMacroscopicParametersCellInterval(sliceCellInterval.first, sliceCellInterval.second); + } + + const uint_t velCtr = uint_c(welfordSweep.getCounter()); + welfordSweep.setCounter(welfordSweep.getCounter() + real_c(1.0)); + }) << Sweep(welfordSweep.getSweepOnCellInterval(blocks, globalCellInterval), "welford sweep"); + +*/ + + /*SweepTimeloop timeloop(blocks->getBlockStorage(), setup.timesteps); + timeloop.add() << BeforeFunction(communication.getCommunicateFunctor(), "communication") + << Sweep(boundaryCollection.getSweep(BoundaryCollection_T::ALL, defaultStream), "Boundary Conditions"); + timeloop.add() << Sweep(sweepCollection.streamCollide(SweepCollection_T::ALL, defaultStream), "LBM StreamCollide");*/ + + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /// VTK Output /// + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + // VTK + auto VTKWriter = config->getOneBlock("VTKWriter"); + const uint_t vtkWriteFrequency = VTKWriter.getParameter< uint_t >("vtkWriteFrequency", 0); + const uint_t vtkGhostLayers = VTKWriter.getParameter< uint_t >("vtkGhostLayers", 0); + const bool amrFileFormat = VTKWriter.getParameter< bool >("amrFileFormat", false); + const bool oneFilePerProcess = VTKWriter.getParameter< bool >("oneFilePerProcess", false); + const real_t samplingResolution = VTKWriter.getParameter< real_t >("samplingResolution", real_c(-1.0)); + const uint_t initialWriteCallsToSkip = VTKWriter.getParameter< uint_t >("initialWriteCallsToSkip", uint_c(0.0)); + const real_t sliceThickness = VTKWriter.getParameter< real_t >("sliceThickness", real_c(1.0)); + + if (vtkWriteFrequency > 0){ + const std::string vtkFolder = "VTKPorousMediaRE_" + std::to_string(uint64_c(setup.reynoldsNumber)); + auto vtkOutput = + vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, vtkGhostLayers, false, vtkFolder, + "simulation_step", false, true, true, false, 0, amrFileFormat, oneFilePerProcess); + + vtkOutput->setInitialWriteCallsToSkip(initialWriteCallsToSkip); + + vtkOutput->addBeforeFunction([&]() { + for (auto& block : *blocks){ + sweepCollection.calculateMacroscopicParameters(&block); + } + + + // for (auto const& sliceCellInterval : sliceCellIntervals){ + // sweepCollection.calculateMacroscopicParametersCellInterval(sliceCellInterval.first, sliceCellInterval.second); + // } + +#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT) + gpu::fieldCpy< VelocityField_T, gpu::GPUField< VelocityField_T::value_type > >(blocks, ids.velocityField, ids.velocityFieldGPU); + // gpu::fieldCpy< VelocityField_T, gpu::GPUField< VelocityField_T::value_type > >(blocks, ids.meanVelocityField, ids.meanVelocityFieldGPU); + // gpu::fieldCpy< ScalarField_T, gpu::GPUField< ScalarField_T::value_type > >(blocks, ids.densityField, ids.densityFieldGPU); +#endif + }); + + vtkOutput->setSamplingResolution(samplingResolution); + + field::FlagFieldCellFilter<FlagField_T> fluidFilter( ids.flagField ); + fluidFilter.addFlag( setup.obstacleUID ); + vtkOutput->addCellExclusionFilter(fluidFilter); + + + if (VTKWriter.getParameter< bool >("writeOnlySlice", true)){ + // const AABB sliceXY(finalDomain.xMin(), finalDomain.yMin(), finalDomain.center()[2] - sliceThickness * setup.dxF, + // finalDomain.xMax(), finalDomain.yMax(), finalDomain.center()[2] + sliceThickness * setup.dxF); + vtkOutput->addCellInclusionFilter(vtk::AABBCellFilter(sliceXY)); + + if (VTKWriter.getParameter< bool >("writeXZSlice", false)){ + const AABB sliceXZ(finalDomain.xMin(), finalDomain.center()[1] - setup.dxF, finalDomain.zMin(), + finalDomain.xMax(), finalDomain.center()[1] + setup.dxF, finalDomain.zMax()); + vtkOutput->addCellInclusionFilter(vtk::AABBCellFilter(sliceXZ)); + } + } + + if (VTKWriter.getParameter< bool >("velocity")){ + if (VTKWriter.getParameter< bool >("writeVelocityAsMagnitude")){ + auto velWriter = make_shared< field::VTKMagnitudeWriter< VelocityField_T, float32 > >(ids.velocityField, "velocityMagnitude"); + vtkOutput->addCellDataWriter(velWriter); + } + else{ + auto velWriter = make_shared< field::VTKWriter< VelocityField_T, float32 > >(ids.velocityField, "velocity"); + vtkOutput->addCellDataWriter(velWriter); + } + } + if (VTKWriter.getParameter< bool >("meanVelocity", false)){ + auto meanVelWriter = make_shared< field::VTKWriter< VelocityField_T, float32 > >(ids.meanVelocityField, "meanVelocity"); + vtkOutput->addCellDataWriter(meanVelWriter); + + } + if (VTKWriter.getParameter< bool >("density")){ + auto densityWriter = make_shared< field::VTKWriter< ScalarField_T, float32 > >(ids.densityField, "density"); + vtkOutput->addCellDataWriter(densityWriter); + } + if (VTKWriter.getParameter< bool >("flag")){ + auto flagWriter = make_shared< field::VTKWriter< FlagField_T > >(ids.flagField, "flag"); + vtkOutput->addCellDataWriter(flagWriter); + } + timeloop.addFuncAfterTimeStep(vtk::writeFiles(vtkOutput), "VTK Output"); + } + + // log remaining time + if (uint_c(setup.remainingTimeLoggerFrequency) > 0){ + timeloop.addFuncAfterTimeStep( + timing::RemainingTimeLogger(timeloop.getNrOfTimeSteps(), setup.remainingTimeLoggerFrequency), + "remaining time logger"); + } + + auto CheckerParameters = config->getOneBlock("StabilityChecker"); + const uint_t checkFrequency = CheckerParameters.getParameter< uint_t >("checkFrequency", uint_t(0)); + if (checkFrequency > 0){ + auto checkFunction = [](VelocityField_T::value_type value) { return value < math::abs(VelocityField_T::value_type(10)); }; + timeloop.addFuncAfterTimeStep( + makeSharedFunctor(field::makeStabilityChecker< VelocityField_T, FlagField_T >( + config, blocks, ids.velocityField, ids.flagField, setup.fluidUID, checkFunction)), + "Stability check"); + } + + WALBERLA_MPI_BARRIER() +#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT) + WALBERLA_GPU_CHECK(gpuDeviceSynchronize()) + WALBERLA_GPU_CHECK(gpuPeekAtLastError()) +#endif + + const lbm_generated::PerformanceEvaluation< FlagField_T > performance(blocks, ids.flagField, setup.fluidUID); + field::CellCounter< FlagField_T > fluidCells(blocks, ids.flagField, setup.fluidUID); + fluidCells(); + + WALBERLA_LOG_INFO_ON_ROOT(++step << ". Starting Simulation with " << setup.timesteps << " timesteps") + WcTimingPool timeloopTiming; + WcTimer simTimer; + + WALBERLA_MPI_BARRIER() +#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT) + WALBERLA_GPU_CHECK(gpuDeviceSynchronize()) + WALBERLA_GPU_CHECK(gpuPeekAtLastError()) +#endif + + simTimer.start(); + timeloop.run(timeloopTiming); +#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT) + WALBERLA_GPU_CHECK(gpuDeviceSynchronize()) + WALBERLA_GPU_CHECK(gpuPeekAtLastError()) +#endif + WALBERLA_MPI_BARRIER() + simTimer.end(); + WALBERLA_LOG_INFO_ON_ROOT("Simulation finished") + double time = double_c(simTimer.max()); + WALBERLA_MPI_SECTION() { walberla::mpi::reduceInplace(time, walberla::mpi::MAX); } + performance.logResultOnRoot(setup.timesteps, time); + + const auto reducedTimeloopTiming = timeloopTiming.getReduced(); + WALBERLA_LOG_RESULT_ON_ROOT("Time loop timing:\n" << *reducedTimeloopTiming) + + printResidentMemoryStatistics(); + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/apps/showcases/Debug/Debug.py b/apps/showcases/Debug/Debug.py new file mode 100644 index 000000000..dfb101c55 --- /dev/null +++ b/apps/showcases/Debug/Debug.py @@ -0,0 +1,126 @@ +import sympy as sp +from pystencils import Target + +from pystencils.field import fields +from pystencils.simp import insert_aliases, insert_constants + +from lbmpy import LBStencil, LBMConfig, LBMOptimisation +from lbmpy.boundaries.boundaryconditions import NoSlip, NoSlipLinearBouzidi, ExtrapolationOutflow, UBB, FixedDensity +from lbmpy.creationfunctions import create_lb_collision_rule +from lbmpy.enums import Method, Stencil, ForceModel +from lbmpy.flow_statistics import welford_assignments + +from pystencils_walberla import CodeGeneration, generate_info_header, generate_sweep +from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator + +info_header = """ +#pragma once +#include <map> +std::map<std::string, std::string> infoMap{{{{"stencil", "{stencil}"}}, + {{"streamingPattern", "{streaming_pattern}"}}, + {{"collisionOperator", "{collision_operator}"}}}}; +""" + +omega = sp.symbols("omega") +inlet_velocity = sp.symbols("u_x") +force_vector = tuple([sp.Symbol("Fx"), 0.0, 0.0]) +max_threads = 256 + +with CodeGeneration() as ctx: + target = Target.CPU # Target.GPU if ctx.gpu else Target.CPU + name_prefix = "Debug" # "PorousMediaGPU" + sweep_params = {'block_size': (128, 1, 1)} if ctx.gpu else {} + + # The application is meant to be compiled with double precision. For single precision, the pdf_dtype can be switched + # to float32. In this way the calculations are still performed in double precision while the application can profit + # from enhanced performance due to the lower precision of the PDF field + dtype = 'float64' if ctx.double_accuracy else 'float32' + pdf_dtype = 'float64' + + stencil = LBStencil(Stencil.D3Q27) + q = stencil.Q + dim = stencil.D + + streaming_pattern = 'esotwist' + + pdfs = fields(f"pdfs({stencil.Q}): {pdf_dtype}[3D]", layout='fzyx') + velocity_field, density_field = fields(f"velocity({dim}), density(1) : {dtype}[{dim}D]", layout='fzyx') + mean_velocity = fields(f"mean_velocity({dim}): {dtype}[{stencil.D}D]", layout='fzyx') + + macroscopic_fields = {'density': density_field, 'velocity': velocity_field} + + # method_enum = Method.CENTRAL_MOMENT + method_enum = Method.SRT + lbm_config = LBMConfig( + method=method_enum, + stencil=stencil, + relaxation_rate=omega, + compressible=True, + force_model=ForceModel.GUO, + force=force_vector, + # subgrid_scale_model=SubgridScaleModel.QR, + fourth_order_correction=0.01 if method_enum == Method.CUMULANT and stencil.Q == 27 else False, + field_name='pdfs', + streaming_pattern=streaming_pattern + ) + + lbm_opt = LBMOptimisation(cse_global=False, cse_pdfs=False, field_layout="fzyx", + symbolic_field=pdfs) + + collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) + if lbm_config.method == Method.CUMULANT: + collision_rule = insert_constants(collision_rule) + collision_rule = insert_aliases(collision_rule) + lb_method = collision_rule.method + + no_slip = lbm_boundary_generator(class_name='NoSlip', flag_uid='NoSlip', + boundary_object=NoSlip(), field_data_type=pdf_dtype) + + quadratic_bounce_back = NoSlipLinearBouzidi(calculate_force_on_boundary=True, data_type=dtype) + no_slip_interpolated = lbm_boundary_generator(class_name='Obstacle', flag_uid='Obstacle', + boundary_object=quadratic_bounce_back, + field_data_type=pdf_dtype) + + + turbulentubb = lbm_boundary_generator(class_name='TurbulentInflow', flag_uid='TurbulentInflow', + boundary_object=UBB(velocity=velocity_field.center_vector, density=1.0, data_type=dtype, dim=dim), + field_data_type=pdf_dtype) + + ubb = lbm_boundary_generator(class_name='UBB', flag_uid='UBB', + boundary_object=UBB(velocity=(inlet_velocity, 0.0, 0.0), density=1.0, data_type=dtype, dim=dim), + field_data_type=pdf_dtype) + + # outflow_boundary = ExtrapolationOutflow(stencil[4], lb_method, data_type=pdf_dtype, density=1.0) + outflow_boundary = ExtrapolationOutflow(stencil[4], lb_method, data_type=pdf_dtype) + outflow = lbm_boundary_generator(class_name='Outflow', flag_uid='Outflow', + boundary_object=outflow_boundary, + field_data_type=pdf_dtype) + + # outflow = lbm_boundary_generator(class_name='Outflow', flag_uid='Outflow', + # boundary_object=FixedDensity(1.0), + # field_data_type=pdf_dtype) + + generate_lbm_package(ctx, name=name_prefix, collision_rule=collision_rule, + lbm_config=lbm_config, lbm_optimisation=lbm_opt, + nonuniform=True, boundaries=[no_slip, no_slip_interpolated, ubb, turbulentubb, outflow], + macroscopic_fields=macroscopic_fields, gpu_indexing_params=sweep_params, + target=target, data_type=dtype, pdfs_data_type=pdf_dtype, + max_threads=max_threads) + + welford_update = welford_assignments(field=velocity_field, mean_field=mean_velocity) + generate_sweep(ctx, "Welford", welford_update, target=target) + + field_typedefs = {'VelocityField_T': velocity_field, + 'ScalarField_T': density_field} + + # Info header containing correct template definitions for stencil and field + generate_info_header(ctx, f'{name_prefix}InfoHeader', + field_typedefs=field_typedefs) + + infoHeaderParams = { + 'stencil': stencil.name.lower(), + 'streaming_pattern': streaming_pattern, + 'collision_operator': lbm_config.method.name.lower(), + } + + ctx.write_file(f"{name_prefix}StaticDefines.h", info_header.format(**infoHeaderParams)) \ No newline at end of file diff --git a/apps/showcases/PorousMediaGPU/CMakeLists.txt b/apps/showcases/PorousMediaGPU/CMakeLists.txt index 087cab7fb..9b7a4e28a 100644 --- a/apps/showcases/PorousMediaGPU/CMakeLists.txt +++ b/apps/showcases/PorousMediaGPU/CMakeLists.txt @@ -12,6 +12,7 @@ waLBerla_generate_target_from_python(NAME PorousMediaGPUCodeGen NoSlip.h NoSlip.${CODEGEN_FILE_SUFFIX} Obstacle.h Obstacle.${CODEGEN_FILE_SUFFIX} UBB.h UBB.${CODEGEN_FILE_SUFFIX} + TurbulentInflow.h TurbulentInflow.${CODEGEN_FILE_SUFFIX} Outflow.h Outflow.${CODEGEN_FILE_SUFFIX} Welford.h Welford.${CODEGEN_FILE_SUFFIX} PorousMediaGPUBoundaryCollection.h @@ -27,7 +28,19 @@ else() FILES PorousMediaGPU.cpp GridGeneration.h Setup.h Types.h DragEvaluation.cpp InitializerFunctions.cpp TPMS.cpp Inflow.cpp Probe.cpp util.cpp DEPENDS blockforest boundary core field io lbm_generated postprocessing stencil timeloop vtk PorousMediaGPUCodeGen ) -# target_include_directories(PorousMediaGPU PUBLIC ${HDF5_CXX_INCLUDE_DIRS}) -# target_link_libraries(PorousMediaGPU PUBLIC ${HDF5_LIBRARIES} ) - endif(WALBERLA_BUILD_WITH_CUDA OR WALBERLA_BUILD_WITH_HIP) + +waLBerla_generate_target_from_python(NAME DebugCodeGen + FILE Debug.py + CODEGEN_CFG debug_codegen + OUT_FILES DebugSweepCollection.h DebugSweepCollection.cpp + DebugStorageSpecification.h DebugStorageSpecification.cpp + NoSlip.h NoSlip.cpp + UBB.h UBB.cpp + DebugBoundaryCollection.h + DebugInfoHeader.h + DebugStaticDefines.h) + +waLBerla_add_executable ( NAME Debug + FILES Debug.cpp + DEPENDS blockforest boundary core field io lbm_generated postprocessing stencil timeloop vtk DebugCodeGen ) diff --git a/apps/showcases/PorousMediaGPU/Debug.cpp b/apps/showcases/PorousMediaGPU/Debug.cpp new file mode 100644 index 000000000..130cb9251 --- /dev/null +++ b/apps/showcases/PorousMediaGPU/Debug.cpp @@ -0,0 +1,468 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file PorousMediaGPU.cpp +//! \author Markus Holzer <markus.holzer@fau.de> +// +//====================================================================================================================== + +#include "blockforest/Initialization.h" +#include "blockforest/SetupBlock.h" +#include "blockforest/SetupBlockForest.h" +#include "blockforest/loadbalancing/StaticCurve.h" +#include "blockforest/StructuredBlockForest.h" +#include "blockforest/communication/NonUniformBufferedScheme.h" + +#include "core/Abort.h" +#include "core/DataTypes.h" +#include "core/SharedFunctor.h" +#include "core/debug/CheckFunctions.h" +#include "core/logging/Logging.h" +#include "core/math/Vector3.h" +#include "core/math/Constants.h" +#include "core/Environment.h" +#include "core/mpi/MPIManager.h" +#include "core/MemoryUsage.h" +#include "core/mpi/Reduce.h" +#include "core/timing/RemainingTimeLogger.h" +#include "core/timing/TimingPool.h" + +#include "field/AddToStorage.h" +#include "field/CellCounter.h" +#include "field/FlagField.h" +#include "field/StabilityChecker.h" +#include "field/adaptors/AdaptorCreators.h" +#include "field/iterators/FieldIterator.h" +#include "field/vtk/VTKWriter.h" +#include "field/vtk/FlagFieldCellFilter.h" + +#include "geometry/InitBoundaryHandling.h" +#include "geometry/mesh/TriangleMeshIO.h" + +#include "lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h" +#include "lbm_generated/evaluation/PerformanceEvaluation.h" +#include "lbm_generated/field/AddToStorage.h" +#include "lbm_generated/field/PdfField.h" +#include "lbm_generated/refinement/BasicRecursiveTimeStep.h" + +#include "timeloop/SweepTimeloop.h" +#include "vtk/VTKOutput.h" + +#include <cstdlib> +#include <iostream> +#include <memory> + +#include "DebugStaticDefines.h" +#include "DebugInfoHeader.h" + +using namespace walberla; + +using StorageSpecification_T = lbm::DebugStorageSpecification; +using Stencil_T = StorageSpecification_T::Stencil; +using CommunicationStencil_T = StorageSpecification_T::CommunicationStencil; + +using PdfField_T = lbm_generated::PdfField< StorageSpecification_T >; +using FlagField_T = FlagField< uint8_t >; + +using BoundaryCollection_T = lbm::DebugBoundaryCollection< FlagField_T >; +using SweepCollection_T = lbm::DebugSweepCollection; + + +using macroFieldType = VelocityField_T::value_type; +using RefinementSelectionFunction = std::function<void (SetupBlockForest &)>; + +struct IDs +{ + BlockDataID pdfField; + BlockDataID velocityField; + BlockDataID meanVelocityField; + BlockDataID velocityFieldInflowSlice; + BlockDataID densityField; + BlockDataID flagField; +}; + +void refineWalls(SetupBlockForest& forest, const real_t hemispheresRadius, const real_t dx, const uint_t level) +{ + auto domain = forest.getDomain(); + const real_t yMin = domain.yMin(); + const real_t yMax = domain.yMax(); + + const real_t t = hemispheresRadius + dx; + + const AABB minAABB(domain.xMin(), domain.yMin(), domain.zMin(), domain.xMax(), domain.yMin() + t, domain.zMax()); + // const AABB maxAABB(domain.xMin(), domain.yMax() - t, domain.zMin(), domain.xMax(), domain.yMax(), domain.zMax()); + + for (auto block = forest.begin(); block != forest.end(); ++block){ + const AABB& aabb = block->getAABB(); + + // if (block->getLevel() < level && (minAABB.intersects(aabb) || maxAABB.intersects(aabb)) ){ + if (block->getLevel() < level && minAABB.intersects(aabb) ){ + block->setMarker(true); + } + } +} + +void createSetupBlockForest(SetupBlockForest& setupBfs, std::shared_ptr<Config>& config, const RefinementSelectionFunction& refinementSelection, const bool useMPIManager=false) +{ + auto domainParameters = config->getOneBlock("DomainSetup"); + + WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...") + + uint_t numProcesses = domainParameters.getParameter< uint_t >("numberProcesses"); + + const std::string blockForestFilestem = "Debug"; + + if(useMPIManager) {numProcesses = uint_c(mpi::MPIManager::instance()->numProcesses());} + + // TPMSRefinementSelection tpmsRefinementSelection( setup ); + setupBfs.addRefinementSelectionFunction(refinementSelection); + + // SphereBlockExclusion SphereBlockExclusion( Sphere ); + // setupBfs.addBlockExclusionFunction(SphereBlockExclusion); + + Vector3< real_t > domainSize = domainParameters.getParameter< Vector3< real_t > >("domainSize"); + Vector3< uint_t > rootBlocks = domainParameters.getParameter< Vector3< uint_t > >("blocks"); + Vector3< bool > periodic = domainParameters.getParameter< Vector3< bool > >("periodic"); + uint_t refinementLevels = domainParameters.getParameter< uint_t >("refinementLevels"); + + const AABB domain(real_c(0.0), real_c(0.0), real_c(0.0), domainSize[0], domainSize[1], domainSize[2]); + setupBfs.addWorkloadMemorySUIDAssignmentFunction(blockforest::uniformWorkloadAndMemoryAssignment); + setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2], + periodic[0], periodic[1], periodic[2]); + setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numProcesses); + + WALBERLA_LOG_INFO_ON_ROOT("=========================== BLOCK FOREST STATISTICS ============================"); + WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks()) + for (uint_t level = 0; level <= refinementLevels; level++){ + const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level); + WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks) + } + + const real_t avgBlocksPerProc = real_c(setupBfs.getNumberOfBlocks()) / real_c(setupBfs.getNumberOfProcesses()); + WALBERLA_LOG_INFO_ON_ROOT("Average blocks per process: " << avgBlocksPerProc); + + Vector3< uint_t > c = domainParameters.getParameter< Vector3< uint_t > >("cellsPerBlock"); + + const uint_t cellsPerBlock = c[0] * c[1] * c[2];; + const uint_t totalNumberCells = setupBfs.getNumberOfBlocks() * cellsPerBlock; + const real_t averageCellsPerGPU = avgBlocksPerProc * real_c(cellsPerBlock); + + const uint_t PDFsPerCell = StorageSpecification_T::inplace ? Stencil_T::Q : 2 * Stencil_T::Q; + const uint_t valuesPerCell = (PDFsPerCell + VelocityField_T::F_SIZE + ScalarField_T::F_SIZE); + const uint_t sizePerValue = sizeof(StorageSpecification_T::value_type); + const double expectedMemory = double_c(totalNumberCells * valuesPerCell * sizePerValue) * 1e-9; + + WALBERLA_LOG_INFO_ON_ROOT( "Total number of cells will be " << totalNumberCells << " fluid cells (in total on all levels)") + WALBERLA_LOG_INFO_ON_ROOT( "Expected total memory demand will be " << expectedMemory << " GB") + + WALBERLA_LOG_INFO_ON_ROOT("=================================================================================") + + if(mpi::MPIManager::instance()->numProcesses() > 1) + return; + + auto logging = config->getOneBlock("Logging"); + bool writeSetupForestAndReturn = logging.getParameter< bool >("writeSetupForestAndReturn", false); + + if(writeSetupForestAndReturn){ + std::ostringstream oss; + oss << blockForestFilestem << ".bfs"; + setupBfs.saveToFile(oss.str().c_str()); + + setupBfs.writeVTKOutput(blockForestFilestem); + } +} + +void createBlockForest(shared_ptr< BlockForest >& bfs, std::shared_ptr<Config>& config, const RefinementSelectionFunction& refinementSelection){ + if (mpi::MPIManager::instance()->numProcesses() > 1){ + // Load structured block forest from file + std::ostringstream oss; + oss << "debug" << ".bfs"; + const std::string setupBlockForestFilepath = oss.str(); + std::ifstream infile(setupBlockForestFilepath.c_str()); + if(!infile.good()){ + WALBERLA_LOG_WARNING_ON_ROOT("Blockforest was not created beforehand and thus needs to be created on the fly. For large simulation runs this can be a severe problem!") + SetupBlockForest setupBfs; + createSetupBlockForest(setupBfs, config, refinementSelection, true); + bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs); + } + else{ + bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), + setupBlockForestFilepath.c_str(), false); + } + } + else{ + SetupBlockForest setupBfs; + createSetupBlockForest(setupBfs, config, refinementSelection); + bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs); + } +} + + + +int main(int argc, char** argv){ + Environment env( argc, argv ); + mpi::MPIManager::instance()->useWorldComm(); + + auto config = env.config(); + + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /// SETUP AND CONFIGURATION /// + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + auto parameters = config->getOneBlock("Parameters"); + auto loggingParameters = config->getOneBlock("Logging"); + auto boundariesConfig = config->getBlock("Boundaries"); + auto domainParameters = config->getOneBlock("DomainSetup"); + + + bool writeSetupForestAndReturn = loggingParameters.getParameter< bool >("writeSetupForestAndReturn", false); + if (uint_c(MPIManager::instance()->numProcesses()) > 1) writeSetupForestAndReturn = false; + shared_ptr< BlockForest > bfs; + + Vector3< real_t > domainSize = domainParameters.getParameter< Vector3< real_t > >("domainSize"); + Vector3< uint_t > cellsPerBlock = domainParameters.getParameter< Vector3< uint_t > >("cellsPerBlock"); + Vector3< uint_t > rootBlocks = domainParameters.getParameter< Vector3< uint_t > >("blocks"); + uint_t refinementLevels = domainParameters.getParameter< uint_t >("refinementLevels"); + uint_t timesteps = parameters.getParameter< uint_t >("timesteps"); + + const real_t dx = domainSize[0] / real_c(rootBlocks[0] * cellsPerBlock[0]); + WALBERLA_CHECK_FLOAT_EQUAL_EPSILON(dx, domainSize[1] / real_c(rootBlocks[1] * cellsPerBlock[1]), real_c(1e-12), "Resulting resolution is not equal in x and y direxction. This is not supported"); + WALBERLA_CHECK_FLOAT_EQUAL_EPSILON(dx, domainSize[2] / real_c(rootBlocks[2] * cellsPerBlock[2]), real_c(1e-12), "Resulting resolution is not equal in x and z direxction. This is not supported"); + + auto dxF = dx / real_c( 1 << refinementLevels ); + auto refinementSelection = std::bind(refineWalls, std::placeholders::_1, real_c(1.0), dxF, refinementLevels); + + createBlockForest(bfs, config, refinementSelection); + + if (writeSetupForestAndReturn && mpi::MPIManager::instance()->numProcesses() == 1){ + WALBERLA_LOG_INFO_ON_ROOT("BlockForest has been created and writen to file. Returning program") + logging::Logging::printFooterOnStream(); + return EXIT_SUCCESS; + } + + auto blocks = std::make_shared< StructuredBlockForest >(bfs, cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2]); + blocks->createCellBoundingBoxes(); + auto finalDomain = blocks->getDomain(); + + const StorageSpecification_T StorageSpec = StorageSpecification_T(); + // Creating fields + uint_t step = 0; + WALBERLA_LOG_INFO_ON_ROOT(++step << ". Allocating data") + IDs ids; + + ids.pdfField = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(2), field::fzyx); + ids.velocityField = field::addToStorage< VelocityField_T >(blocks, "vel", macroFieldType(0.0), field::fzyx, uint_c(2)); + ids.meanVelocityField = field::addToStorage< VelocityField_T >(blocks, "mean velocity", macroFieldType(0.0), field::fzyx, uint_c(2)); + ids.densityField = field::addToStorage< ScalarField_T >(blocks, "density", macroFieldType(1.0), field::fzyx, uint_c(2)); + ids.flagField = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field", uint_c(2)); + + const real_t drivingForce = 1e-6; + const real_t omega = 1.970443349753695; + const real_t latticeVelocity = 0.05; + + const Cell innerOuterSplit = Cell(parameters.getParameter< Vector3< cell_idx_t > >("innerOuterSplit", Vector3< cell_idx_t >(1, 1, 1))); + SweepCollection_T sweepCollection(blocks, ids.pdfField, ids.densityField, ids.velocityField, drivingForce, omega, innerOuterSplit); + + WALBERLA_LOG_INFO_ON_ROOT(++step << ". Initialising data") + for (auto& block : *blocks){ + sweepCollection.initialise(&block, cell_idx_c(1)); + } + + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /// LB SWEEPS AND BOUNDARY HANDLING /// + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + auto fluidUID = FlagUID("Fluid"); + auto obstacleUID = FlagUID("Obstacle"); + + // Boundaries + WALBERLA_LOG_INFO_ON_ROOT(++step << ". Setting boundary conditions") + + + geometry::initBoundaryHandling< FlagField_T >(*blocks, ids.flagField, boundariesConfig); + + geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, ids.flagField, fluidUID, 2); + BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfField, fluidUID, latticeVelocity); + + + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /// COMMUNICATION SCHEME /// + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + auto communication = std::make_shared< NonUniformBufferedScheme< CommunicationStencil_T > >(blocks); + auto packInfo = lbm_generated::setupNonuniformPdfCommunication< PdfField_T >(blocks, ids.pdfField); + communication->addPackInfo(packInfo); + + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /// TIME STEP DEFINITIONS /// + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + SweepTimeloop timeloop(blocks->getBlockStorage(), timesteps); + + lbm_generated::BasicRecursiveTimeStep< PdfField_T, SweepCollection_T, BoundaryCollection_T > LBMMeshRefinement( + blocks, ids.pdfField, sweepCollection, boundaryCollection, communication, packInfo); +/* + for (auto& block : *blocks){ + const auto pdfs = block.getData< PdfField_T >(ids.pdfField); + real_t level = real_c(blocks->getLevel(block)); + for( auto it = pdfs->beginWithGhostLayer(2); it != pdfs->end(); ++it ){ + for(uint_t i = 0; i < Stencil_T::Q; ++i){ + pdfs->get(it.x(), it.y(), it.z(), i) = real_c(Stencil_T::Q)*level + real_c(i); + } + } + } + + return 0; + + for (auto& block : *blocks){ + const auto pdfs = block.getData< PdfField_T >(ids.pdfField); + auto level = blocks->getLevel(block); + + if(level == 0){ + for( auto it = pdfs->beginGhostLayerOnly(stencil::Direction::S); it != pdfs->end(); ++it ){ + + WALBERLA_LOG_DEVEL_VAR(it) + } + } + + } + + return 0; + +// LBMMeshRefinement.addRefinementToTimeLoop(timeloop); +// timeloop.addFuncAfterTimeStep(LBMMeshRefinement, "test"); +// timeloop.run(); +// WALBERLA_LOG_INFO_ON_ROOT("test") + +for(auto i = 0; i < 50; ++i){ + for (auto& block : *blocks){ + boundaryCollection(&block); + } +} +*/ +// for(auto i = 0; i < 50; ++i) +// LBMMeshRefinement(); + +// return 0; + + // timeloop.addFuncBeforeTimeStep(LBMMeshRefinement, "Refinement timestep"); + LBMMeshRefinement.addRefinementToTimeLoop(timeloop); + + const AABB sliceXY(finalDomain.xMin(), finalDomain.yMin(), finalDomain.center()[2] - dxF, + finalDomain.xMax(), finalDomain.yMax(), finalDomain.center()[2] + dxF); + + + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /// VTK Output /// + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + // VTtimeloop.addFuncAfterTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");K + auto VTKWriter = config->getOneBlock("VTKWriter"); + const uint_t vtkWriteFrequency = VTKWriter.getParameter< uint_t >("vtkWriteFrequency", 0); + const real_t sliceThickness = VTKWriter.getParameter< real_t >("sliceThickness", real_c(1.0)); + + if (vtkWriteFrequency > 0){ + const std::string vtkFolder = "debug"; + auto vtkOutput = + vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, vtkFolder, + "simulation_step", false, true, true, false, 0, false, false); + + + vtkOutput->addBeforeFunction([&]() { + for (auto& block : *blocks){ + sweepCollection.calculateMacroscopicParameters(&block); + } + }); + + + field::FlagFieldCellFilter<FlagField_T> fluidFilter( ids.flagField ); + fluidFilter.addFlag(obstacleUID ); + vtkOutput->addCellExclusionFilter(fluidFilter); + + + if (VTKWriter.getParameter< bool >("writeOnlySlice", true)){ + // const AABB sliceXY(finalDomain.xMin(), finalDomain.yMin(), finalDomain.center()[2] - sliceThickness * setup.dxF, + // finalDomain.xMax(), finalDomain.yMax(), finalDomain.center()[2] + sliceThickness * setup.dxF); + vtkOutput->addCellInclusionFilter(vtk::AABBCellFilter(sliceXY)); + + if (VTKWriter.getParameter< bool >("writeXZSlice", false)){ + const AABB sliceXZ(finalDomain.xMin(), finalDomain.center()[1] - dxF, finalDomain.zMin(), + finalDomain.xMax(), finalDomain.center()[1] + dxF, finalDomain.zMax()); + vtkOutput->addCellInclusionFilter(vtk::AABBCellFilter(sliceXZ)); + } + } + + if (VTKWriter.getParameter< bool >("velocity")){ + if (VTKWriter.getParameter< bool >("writeVelocityAsMagnitude")){ + auto velWriter = make_shared< field::VTKMagnitudeWriter< VelocityField_T, float32 > >(ids.velocityField, "velocityMagnitude"); + vtkOutput->addCellDataWriter(velWriter); + } + else{ + auto velWriter = make_shared< field::VTKWriter< VelocityField_T, float32 > >(ids.velocityField, "velocity"); + vtkOutput->addCellDataWriter(velWriter); + } + } + if (VTKWriter.getParameter< bool >("density")){ + auto densityWriter = make_shared< field::VTKWriter< ScalarField_T, float32 > >(ids.densityField, "density"); + vtkOutput->addCellDataWriter(densityWriter); + } + if (VTKWriter.getParameter< bool >("flag")){ + auto flagWriter = make_shared< field::VTKWriter< FlagField_T > >(ids.flagField, "flag"); + vtkOutput->addCellDataWriter(flagWriter); + } + timeloop.addFuncAfterTimeStep(vtk::writeFiles(vtkOutput), "VTK Output"); + } + + // log remaining time + timeloop.addFuncAfterTimeStep( + timing::RemainingTimeLogger(timeloop.getNrOfTimeSteps(), 20), + "remaining time logger"); + + auto CheckerParameters = config->getOneBlock("StabilityChecker"); + const uint_t checkFrequency = CheckerParameters.getParameter< uint_t >("checkFrequency", uint_t(0)); + if (checkFrequency > 0){ + auto checkFunction = [](VelocityField_T::value_type value) { return value < math::abs(VelocityField_T::value_type(10)); }; + timeloop.addFuncAfterTimeStep( + makeSharedFunctor(field::makeStabilityChecker< VelocityField_T, FlagField_T >( + config, blocks, ids.velocityField, ids.flagField, fluidUID, checkFunction)), + "Stability check"); + } + + WALBERLA_MPI_BARRIER() + + const lbm_generated::PerformanceEvaluation< FlagField_T > performance(blocks, ids.flagField, fluidUID); + field::CellCounter< FlagField_T > fluidCells(blocks, ids.flagField, fluidUID); + fluidCells(); + + WALBERLA_LOG_INFO_ON_ROOT(++step << ". Starting Simulation with " << timesteps << " timesteps") + WcTimingPool timeloopTiming; + WcTimer simTimer; + + WALBERLA_MPI_BARRIER() + + simTimer.start(); + timeloop.run(timeloopTiming); + WALBERLA_MPI_BARRIER() + simTimer.end(); + WALBERLA_LOG_INFO_ON_ROOT("Simulation finished") + double time = double_c(simTimer.max()); + WALBERLA_MPI_SECTION() { walberla::mpi::reduceInplace(time, walberla::mpi::MAX); } + performance.logResultOnRoot(timesteps, time); + + const auto reducedTimeloopTiming = timeloopTiming.getReduced(); + WALBERLA_LOG_RESULT_ON_ROOT("Time loop timing:\n" << *reducedTimeloopTiming) + + printResidentMemoryStatistics(); + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/apps/showcases/PorousMediaGPU/Debug.py b/apps/showcases/PorousMediaGPU/Debug.py new file mode 100644 index 000000000..cb1bc609a --- /dev/null +++ b/apps/showcases/PorousMediaGPU/Debug.py @@ -0,0 +1,127 @@ +import sympy as sp +from pystencils import Target + +from pystencils.field import fields +from pystencils.simp import insert_aliases, insert_constants + +from lbmpy import LBStencil, LBMConfig, LBMOptimisation +from lbmpy.boundaries.boundaryconditions import NoSlip, NoSlipLinearBouzidi, ExtrapolationOutflow, UBB, FixedDensity +from lbmpy.creationfunctions import create_lb_collision_rule +from lbmpy.enums import Method, Stencil, ForceModel +from lbmpy.flow_statistics import welford_assignments + +from pystencils_walberla import CodeGeneration, generate_info_header, generate_sweep +from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator +from lbmpy_walberla import RefinementScaling + +info_header = """ +#pragma once +#include <map> +std::map<std::string, std::string> infoMap{{{{"stencil", "{stencil}"}}, + {{"streamingPattern", "{streaming_pattern}"}}, + {{"collisionOperator", "{collision_operator}"}}}}; +""" + +omega = sp.symbols("omega") +inlet_velocity = sp.symbols("u_x") +force_vector = tuple([sp.Symbol("Fx"), 0.0, 0.0]) +max_threads = 256 + +with CodeGeneration() as ctx: + target = Target.CPU # Target.GPU if ctx.gpu else Target.CPU + name_prefix = "Debug" # "PorousMediaGPU" + sweep_params = {'block_size': (128, 1, 1)} if ctx.gpu else {} + + # The application is meant to be compiled with double precision. For single precision, the pdf_dtype can be switched + # to float32. In this way the calculations are still performed in double precision while the application can profit + # from enhanced performance due to the lower precision of the PDF field + dtype = 'float64' if ctx.double_accuracy else 'float32' + pdf_dtype = 'float64' + + stencil = LBStencil(Stencil.D3Q19) + q = stencil.Q + dim = stencil.D + + streaming_pattern = 'aa' + + pdfs = fields(f"pdfs({stencil.Q}): {pdf_dtype}[3D]", layout='fzyx') + velocity_field, density_field = fields(f"velocity({dim}), density(1) : {dtype}[{dim}D]", layout='fzyx') + mean_velocity = fields(f"mean_velocity({dim}): {dtype}[{stencil.D}D]", layout='fzyx') + + macroscopic_fields = {'density': density_field, 'velocity': velocity_field} + + # method_enum = Method.CENTRAL_MOMENT + method_enum = Method.SRT + lbm_config = LBMConfig( + method=method_enum, + stencil=stencil, + relaxation_rate=omega, + compressible=True, + force=force_vector, + # subgrid_scale_model=SubgridScaleModel.QR, + fourth_order_correction=0.01 if method_enum == Method.CUMULANT and stencil.Q == 27 else False, + field_name='pdfs', + streaming_pattern=streaming_pattern + ) + + lbm_opt = LBMOptimisation(cse_global=False, cse_pdfs=False, field_layout="fzyx", + symbolic_field=pdfs) + + collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) + if lbm_config.method == Method.CUMULANT: + collision_rule = insert_constants(collision_rule) + collision_rule = insert_aliases(collision_rule) + lb_method = collision_rule.method + + no_slip = lbm_boundary_generator(class_name='NoSlip', flag_uid='NoSlip', + boundary_object=NoSlip(), field_data_type=pdf_dtype) + + quadratic_bounce_back = NoSlipLinearBouzidi(calculate_force_on_boundary=True, data_type=dtype) + no_slip_interpolated = lbm_boundary_generator(class_name='Obstacle', flag_uid='Obstacle', + boundary_object=quadratic_bounce_back, + field_data_type=pdf_dtype) + + + turbulentubb = lbm_boundary_generator(class_name='TurbulentInflow', flag_uid='TurbulentInflow', + boundary_object=UBB(velocity=velocity_field.center_vector, density=1.0, data_type=dtype, dim=dim), + field_data_type=pdf_dtype) + + ubb = lbm_boundary_generator(class_name='UBB', flag_uid='UBB', + boundary_object=UBB(velocity=(inlet_velocity, 0.0, 0.0), density=1.0, data_type=dtype, dim=dim), + field_data_type=pdf_dtype) + + # outflow_boundary = ExtrapolationOutflow(stencil[4], lb_method, data_type=pdf_dtype, density=1.0) + outflow_boundary = ExtrapolationOutflow(stencil[4], lb_method, data_type=pdf_dtype) + outflow = lbm_boundary_generator(class_name='Outflow', flag_uid='Outflow', + boundary_object=outflow_boundary, + field_data_type=pdf_dtype) + + # outflow = lbm_boundary_generator(class_name='Outflow', flag_uid='Outflow', + # boundary_object=FixedDensity(1.0), + # field_data_type=pdf_dtype) + + refinement_scaling = RefinementScaling() + refinement_scaling.add_standard_relaxation_rate_scaling(omega) + refinement_scaling.add_force_scaling(sp.Symbol("Fx")) + + generate_lbm_package(ctx, name=name_prefix, collision_rule=collision_rule, + lbm_config=lbm_config, lbm_optimisation=lbm_opt, + nonuniform=True, refinement_scaling=refinement_scaling, boundaries=[no_slip, ubb], + macroscopic_fields=macroscopic_fields, gpu_indexing_params=sweep_params, + target=target, data_type=dtype, pdfs_data_type=pdf_dtype, + max_threads=max_threads) + + field_typedefs = {'VelocityField_T': velocity_field, + 'ScalarField_T': density_field} + + # Info header containing correct template definitions for stencil and field + generate_info_header(ctx, f'{name_prefix}InfoHeader', + field_typedefs=field_typedefs) + + infoHeaderParams = { + 'stencil': stencil.name.lower(), + 'streaming_pattern': streaming_pattern, + 'collision_operator': lbm_config.method.name.lower(), + } + + ctx.write_file(f"{name_prefix}StaticDefines.h", info_header.format(**infoHeaderParams)) \ No newline at end of file diff --git a/apps/showcases/PorousMediaGPU/DragEvaluation.cpp b/apps/showcases/PorousMediaGPU/DragEvaluation.cpp index d8107e6df..624f466ea 100644 --- a/apps/showcases/PorousMediaGPU/DragEvaluation.cpp +++ b/apps/showcases/PorousMediaGPU/DragEvaluation.cpp @@ -27,20 +27,27 @@ DragEvaluation::DragEvaluation(std::shared_ptr< StructuredBlockForest >& blocks, const IDs& ids, const Setup& setup, std::shared_ptr<Config>& config) : blocks_(blocks), boundaryCollection_(boundaryCollection), ids_(ids), setup_(setup) { + auto parameters = config->getOneBlock("DragEvaluation"); + auto wallParameters = config->getOneBlock("RoughWall"); + + // if(parameters.getParameter< uint_t >("evaluationFrequency") == uint_c(0)) + // return; + quantities_ = {{"timestep", 0}, {"Fx", 1}, {"Fy", 2}, {"Fz", 3}, {"CDReal", 4}, {"CLReal", 5}}; WALBERLA_ASSERT_NOT_NULLPTR(blocks) maxLevel_ = blocks->getDepth(); blocks->getBlocks(finestBlocks_, maxLevel_); - auto parameters = config->getOneBlock("DragEvaluation"); - auto wallParameters = config->getOneBlock("RoughWall"); - dragFilename_ = parameters.getParameter< std::string >("filename"); checkFrequency_ = parameters.getParameter< uint_t >("evaluationFrequency"); writeCounter_ = 1000 * checkFrequency_; dt_ = setup.dt / real_c( 1 << setup.refinementLevels ); timesteps_ = setup.timesteps * ( 1 << setup.refinementLevels ); + totalExecutionCounter_ = 0; + + if(writeCounter_ == uint_c(0)) + writeCounter_++; const real_t radius = wallParameters.getParameter< real_t >("hemispheresRadius"); const real_t distance = wallParameters.getParameter< real_t >("hemispheresDistance"); @@ -103,8 +110,7 @@ DragEvaluation::DragEvaluation(std::shared_ptr< StructuredBlockForest >& blocks, surfaceAreaWall_ -= totalCircleArea; WALBERLA_CHECK_GREATER_EQUAL(surfaceAreaWall_, real_c(0.0)) - // TODO - meanVelocity_ = 1.0; + meanVelocity_ = setup.latticeVelocity; WALBERLA_LOG_INFO_ON_ROOT( "Evaluation initialised:" @@ -116,11 +122,13 @@ DragEvaluation::DragEvaluation(std::shared_ptr< StructuredBlockForest >& blocks, void DragEvaluation::forceCalculation(const uint_t level) { - if (checkFrequency_ == uint_c(0)) + if(level == maxLevel_){ + ++totalExecutionCounter_; + } + if (checkFrequency_ == uint_c(0) || (totalExecutionCounter_ % checkFrequency_) != uint_c(0)){ return; - - ++totalExecutionCounter_; - + } + if(level == maxLevel_){ for (auto b : finestBlocks_){ force_ += Vector3<double>(boundaryCollection_.ObstacleObject->getForce(b)); @@ -128,12 +136,11 @@ void DragEvaluation::forceCalculation(const uint_t level) mpi::reduceInplace(force_, mpi::SUM); WALBERLA_ROOT_SECTION(){ - const double t = double_c(totalExecutionCounter_) * dt_; - + const double t = double_c(totalExecutionCounter_ - 1) * dt_; const double meanU2 = double_c(meanVelocity_) * double_c(meanVelocity_); - const double cDRealArea = (double_c(8.0) * force_[0]) / (meanU2 * double_c(surfaceAreaHemiSpheres_)); - const double cLRealArea = (double_c(8.0) * force_[1]) / (meanU2 * double_c(surfaceAreaHemiSpheres_)); + const double cDRealArea = (double_c(2.0) * force_[0]) / (meanU2 * double_c(surfaceAreaHemiSpheres_)); + const double cLRealArea = (double_c(2.0) * force_[1]) / (meanU2 * double_c(surfaceAreaHemiSpheres_)); dragResults_[quantities_["timestep"]*writeCounter_ + executionCounter_] = float_c(t); dragResults_[quantities_["Fx"]*writeCounter_ + executionCounter_] = float_c(force_[0]); @@ -142,14 +149,15 @@ void DragEvaluation::forceCalculation(const uint_t level) dragResults_[quantities_["CDReal"]*writeCounter_ + executionCounter_] = float_c(cDRealArea); dragResults_[quantities_["CLReal"]*writeCounter_ + executionCounter_] = float_c(cLRealArea); } - } - force_[0] = double_c(0.0); - force_[1] = double_c(0.0); - force_[2] = double_c(0.0); - ++executionCounter_; - if(executionCounter_ >= writeCounter_ || totalExecutionCounter_ == timesteps_){ - writeResults(); + force_[0] = double_c(0.0); + force_[1] = double_c(0.0); + force_[2] = double_c(0.0); + + ++executionCounter_; + if(executionCounter_ >= writeCounter_ || totalExecutionCounter_ == timesteps_){ + writeResults(); + } } } diff --git a/apps/showcases/PorousMediaGPU/GridGeneration.h b/apps/showcases/PorousMediaGPU/GridGeneration.h index 53e67e02d..0dbafcbd2 100644 --- a/apps/showcases/PorousMediaGPU/GridGeneration.h +++ b/apps/showcases/PorousMediaGPU/GridGeneration.h @@ -52,8 +52,17 @@ uint_t blockCells(const Setup& setup, const bool withGhostLayers){ return cells; } +using RefinementSelectionFunction = std::function<void (SetupBlockForest &)>; -void createSetupBlockForest(SetupBlockForest& setupBfs, const Setup& setup, const bool useMPIManager=false) +void noRefinement(SetupBlockForest &) +{ + return; +} + +RefinementSelectionFunction defaultRefinement = noRefinement; + + +void createSetupBlockForest(SetupBlockForest& setupBfs, const Setup& setup, const RefinementSelectionFunction& refinementSelection, const bool useMPIManager=false) { WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...") @@ -63,7 +72,7 @@ void createSetupBlockForest(SetupBlockForest& setupBfs, const Setup& setup, cons if(useMPIManager) {numProcesses = uint_c(mpi::MPIManager::instance()->numProcesses());} // TPMSRefinementSelection tpmsRefinementSelection( setup ); - // setupBfs.addRefinementSelectionFunction(std::function<void(SetupBlockForest &)>(tpmsRefinementSelection)); + setupBfs.addRefinementSelectionFunction(refinementSelection); // SphereBlockExclusion SphereBlockExclusion( Sphere ); // setupBfs.addBlockExclusionFunction(SphereBlockExclusion); @@ -119,7 +128,7 @@ void createSetupBlockForest(SetupBlockForest& setupBfs, const Setup& setup, cons } } -void createBlockForest(shared_ptr< BlockForest >& bfs, const Setup& setup){ +void createBlockForest(shared_ptr< BlockForest >& bfs, const Setup& setup, const RefinementSelectionFunction& refinementSelection=defaultRefinement){ if (mpi::MPIManager::instance()->numProcesses() > 1){ // Load structured block forest from file std::ostringstream oss; @@ -129,7 +138,7 @@ void createBlockForest(shared_ptr< BlockForest >& bfs, const Setup& setup){ if(!infile.good()){ WALBERLA_LOG_WARNING_ON_ROOT("Blockforest was not created beforehand and thus needs to be created on the fly. For large simulation runs this can be a severe problem!") SetupBlockForest setupBfs; - createSetupBlockForest(setupBfs, setup, true); + createSetupBlockForest(setupBfs, setup, refinementSelection, true); bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs); } else{ @@ -139,7 +148,7 @@ void createBlockForest(shared_ptr< BlockForest >& bfs, const Setup& setup){ } else{ SetupBlockForest setupBfs; - createSetupBlockForest(setupBfs, setup); + createSetupBlockForest(setupBfs, setup, refinementSelection); bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs); } } \ No newline at end of file diff --git a/apps/showcases/PorousMediaGPU/InitializerFunctions.cpp b/apps/showcases/PorousMediaGPU/InitializerFunctions.cpp index 73a62abc2..3db223d20 100644 --- a/apps/showcases/PorousMediaGPU/InitializerFunctions.cpp +++ b/apps/showcases/PorousMediaGPU/InitializerFunctions.cpp @@ -74,24 +74,23 @@ void initHemisphers(const shared_ptr< StructuredBlockStorage >& blocks, const Bl auto domain = blocks->getDomain(); const real_t yMin = domain.yMin(); const real_t yMax = domain.yMax(); - - for (auto& block : *blocks){ - const auto flagField = block.getData< FlagField_T >(flagFieldID); + + for (auto bIt = blocks->begin(); bIt != blocks->end(); ++bIt){ + Block& b = dynamic_cast< Block& >(*bIt); + const auto flagField = b.getData< FlagField_T >(flagFieldID); const auto boundaryFlag = flagField->getOrRegisterFlag(setup.obstacleUID); - - WALBERLA_FOR_ALL_CELLS_XYZ(flagField, Cell globalCell; - blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z)); - Vector3<real_t> p; - blocks->getCellCenter(p, globalCell, 0); - if(p[1] > radius && p[1] < yMax - radius){ + + for( auto it = flagField->beginWithGhostLayer(1); it != flagField->end(); ++it ){ + Vector3< real_t > cellCenter = blocks->getBlockLocalCellCenter( b, it.cell() ); + blocks->mapToPeriodicDomain(cellCenter); + if(cellCenter[1] > radius && cellCenter[1] < yMax - radius){ continue; } - - auto center = getCenterOfHemisphere(p, radius, hemispheresDistance, yMin, yMax); - if(hemispheresContains(p, center, radius)) { - addFlag(flagField->get(x, y, z), boundaryFlag); + auto center = getCenterOfHemisphere(cellCenter, radius, hemispheresDistance, yMin, yMax); + if(hemispheresContains(cellCenter, center, radius)) { + addFlag(flagField->get(it.x(), it.y(), it.z()), boundaryFlag); } - ) + } } } diff --git a/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp b/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp index 7d32b0352..84b9d75a4 100644 --- a/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp +++ b/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp @@ -169,6 +169,27 @@ real_t defaultWallDistance(const Cell& /*fluidCell*/, const Cell& /*boundaryCell return real_c(0.5); } +void refineWalls(SetupBlockForest& forest, const real_t hemispheresRadius, const real_t dx, const uint_t level) +{ + auto domain = forest.getDomain(); + const real_t yMin = domain.yMin(); + const real_t yMax = domain.yMax(); + + const real_t t = hemispheresRadius + dx; + + const AABB minAABB(domain.xMin(), domain.yMin(), domain.zMin(), domain.xMax(), domain.yMin() + t, domain.zMax()); + const AABB maxAABB(domain.xMin(), domain.yMax() - t, domain.zMin(), domain.xMax(), domain.yMax(), domain.zMax()); + + for (auto block = forest.begin(); block != forest.end(); ++block){ + const AABB& aabb = block->getAABB(); + + if (block->getLevel() < level && (minAABB.intersects(aabb) || maxAABB.intersects(aabb)) ){ + //if (block->getLevel() < level && minAABB.intersects(aabb) ){ + block->setMarker(true); + } + } +} + int main(int argc, char** argv){ @@ -195,17 +216,18 @@ int main(int argc, char** argv){ "\n + stencil: " << setup.stencil << "\n + streaming: " << setup.streamingPattern << "\n + compressible: " << ( StorageSpecification_T::compressible ? "yes" : "no" ) << - "\n + resolution: " << setup.dxC << " - on the coarsest grid" << "\n- simulation properties: " "\n + kin. viscosity: " << setup.kinViscosity << " [m^2/s] (on the coarsest grid)" << "\n + viscosity LB: " << setup.viscosity << " [dx*dx/dt] (on the coarsest grid)" << "\n + omega: " << setup.omega << " (on the coarsest grid)" << + "\n + omega: " << setup.omegaF << " (on the finest grid)" << "\n + inflow velocity: " << setup.referenceVelocity << " [m/s]" << "\n + lattice velocity: " << setup.latticeVelocity << " [dx/dt]" << "\n + Reynolds number: " << setup.reynoldsNumber << "\n + Mach number: " << setup.machNumber << "\n + driving force: " << setup.drivingForce << "\n + dx (coarsest grid): " << setup.dxC << " [m]" << + "\n + dx (finest grid): " << setup.dxF << " [m]" << "\n + dt (coarsest grid): " << setup.dt << " [s]" << "\n + conversion Faktor: " << setup.conversionFactor << "\n + #time steps: " << setup.timesteps << " (on the coarsest grid, " << ( real_c(1.0) / setup.dt ) << " for 1s of real time)" << @@ -214,7 +236,10 @@ int main(int argc, char** argv){ bool writeSetupForestAndReturn = loggingParameters.getParameter< bool >("writeSetupForestAndReturn", false); if (uint_c(MPIManager::instance()->numProcesses()) > 1) writeSetupForestAndReturn = false; shared_ptr< BlockForest > bfs; - createBlockForest(bfs, setup); + + auto refinementSelection = std::bind(refineWalls, _1, real_c(1.0), setup.dxF, setup.refinementLevels); + + createBlockForest(bfs, setup, refinementSelection); if (writeSetupForestAndReturn && mpi::MPIManager::instance()->numProcesses() == 1){ WALBERLA_LOG_INFO_ON_ROOT("BlockForest has been created and writen to file. Returning program") @@ -224,28 +249,28 @@ int main(int argc, char** argv){ auto blocks = std::make_shared< StructuredBlockForest >(bfs, setup.cellsPerBlock[0], setup.cellsPerBlock[1], setup.cellsPerBlock[2]); blocks->createCellBoundingBoxes(); + auto finalDomain = blocks->getDomain(); const StorageSpecification_T StorageSpec = StorageSpecification_T(); // Creating fields uint_t step = 0; WALBERLA_LOG_INFO_ON_ROOT(++step << ". Allocating data") IDs ids; - ids.pdfField = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(1), field::fzyx); + ids.pdfField = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, setup.numGhostLayers, field::fzyx); auto allocator = make_shared< gpu::HostFieldAllocator<macroFieldType> >(); // use pinned memory allocator for faster CPU-GPU memory transfers - ids.velocityField = field::addToStorage< VelocityField_T >(blocks, "vel", macroFieldType(0.0), field::fzyx, uint_c(1), allocator); - ids.meanVelocityField = field::addToStorage< VelocityField_T >(blocks, "mean velocity", macroFieldType(0.0), field::fzyx, uint_c(1), allocator); - ids.densityField = field::addToStorage< ScalarField_T >(blocks, "density", macroFieldType(1.0), field::fzyx, uint_c(1), allocator); - ids.flagField = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field"); + ids.velocityField = field::addToStorage< VelocityField_T >(blocks, "vel", macroFieldType(0.0), field::fzyx, setup.numGhostLayers, allocator); + ids.meanVelocityField = field::addToStorage< VelocityField_T >(blocks, "mean velocity", macroFieldType(0.0), field::fzyx, setup.numGhostLayers, allocator); + ids.densityField = field::addToStorage< ScalarField_T >(blocks, "density", macroFieldType(1.0), field::fzyx, setup.numGhostLayers, allocator); + ids.flagField = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field", setup.numGhostLayers); ids.pdfFieldGPU = lbm_generated::addGPUPdfFieldToStorage< PdfField_T >(blocks, ids.pdfField, StorageSpec, "pdfs on GPU", true); ids.velocityFieldGPU = gpu::addGPUFieldToStorage< VelocityField_T >(blocks, ids.velocityField, "velocity on GPU", true); ids.meanVelocityFieldGPU = gpu::addGPUFieldToStorage< VelocityField_T >(blocks, ids.meanVelocityField, "mean velocity on GPU", true); ids.densityFieldGPU = gpu::addGPUFieldToStorage< ScalarField_T >(blocks, ids.densityField, "velocity on GPU", true); - std::function< Vector3< uint_t > (const shared_ptr< StructuredBlockStorage >&, IBlock * const) > getInflowSliceSize = inflowSliceSize; - ids.velocityFieldInflowSlice = field::addToStorage< VelocityField_T >(blocks, "velocity Inflow", getInflowSliceSize, macroFieldType(0.0), field::fzyx, uint_c(1)); + ids.velocityFieldInflowSlice = field::addToStorage< VelocityField_T >(blocks, "velocity Inflow", getInflowSliceSize, macroFieldType(0.0), field::fzyx, setup.numGhostLayers); ids.velocityFieldInflowSliceGPU = gpu::addGPUFieldToStorage< VelocityField_T >(blocks, ids.velocityFieldInflowSlice, "velocity Inflow on GPU", true); const Cell innerOuterSplit = Cell(parameters.getParameter< Vector3<cell_idx_t> >("innerOuterSplit", Vector3<cell_idx_t>(1, 1, 1))); @@ -253,8 +278,19 @@ int main(int argc, char** argv){ WelfordSweep_T welfordSweep(ids.meanVelocityFieldGPU, ids.velocityFieldGPU, real_c(0.0)); WALBERLA_LOG_INFO_ON_ROOT(++step << ". Initialising data") + + if(setup.initialiseWithlatticeVelocity){ + for (auto& block : *blocks){ + auto velocity = block.getData< VelocityField_T >(ids.velocityField); + WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(velocity, + velocity->get(x, y, z, 0) = setup.latticeVelocity; + ) + } + } + gpu::fieldCpy< gpu::GPUField< VelocityField_T::value_type >, VelocityField_T >(blocks, ids.velocityFieldGPU, ids.velocityField); + for (auto& block : *blocks){ - sweepCollection.initialise(&block); + sweepCollection.initialise(&block, cell_idx_c(setup.numGhostLayers - 1)); auto velocityInflow = block.getData< VelocityField_T >(ids.velocityFieldInflowSlice); WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(velocityInflow, @@ -267,18 +303,19 @@ int main(int argc, char** argv){ auto inflow = std::make_shared<Inflow>(blocks, setup, ids); auto probe = std::make_shared<Probe>(blocks, setup, ids, config->getOneBlock("Probes")); +/* const real_t lenght = 8.0; const real_t res = 3.0; const real_t start = 0.0; - -/* const uint_t nx = uint_c(std::floor(lenght / (setup.dxC * res))); for(uint_t i = 0; i < nx; ++i){ probe->addPoint(Vector3<real_t>(start + real_c(i) * (setup.dxC * res), 0.5, 0.5 )); } */ + probe->initialise(); + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// LB SWEEPS AND BOUNDARY HANDLING /// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -287,11 +324,12 @@ int main(int argc, char** argv){ WALBERLA_LOG_INFO_ON_ROOT(++step << ". Setting boundary conditions") for (auto& block : *blocks){ auto flagField = block.getData< FlagField_T >(ids.flagField); - setup.inflowFlag = flagField->registerFlag(setup.inflowUID); - setup.outflowFlag = flagField->registerFlag(setup.outflowUID); - setup.obstacleFlag = flagField->registerFlag(setup.obstacleUID); - setup.noSlipFlag = flagField->registerFlag(setup.noSlipUID); - setup.fluidFlag = flagField->registerFlag(setup.fluidUID); + setup.inflowFlag = flagField->registerFlag(setup.inflowUID); + setup.turbulentInflowFlag = flagField->registerFlag(setup.turbulentInflowUID); + setup.outflowFlag = flagField->registerFlag(setup.outflowUID); + setup.obstacleFlag = flagField->registerFlag(setup.obstacleUID); + setup.noSlipFlag = flagField->registerFlag(setup.noSlipUID); + setup.fluidFlag = flagField->registerFlag(setup.fluidUID); } @@ -302,6 +340,7 @@ int main(int argc, char** argv){ initGrid(blocks, ids.flagField, setup); } + if(config->getNumBlocks("RoughWall") > 0){ WALBERLA_LOG_INFO_ON_ROOT(++step << ". Using RoughWall setup") auto wallParameters = config->getOneBlock("RoughWall"); @@ -319,10 +358,9 @@ int main(int argc, char** argv){ } - geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, ids.flagField, setup.fluidUID); - // BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfFieldGPU, setup.fluidUID, ids.velocityFieldInflowSliceGPU, setup.drivingForce); - BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfFieldGPU, setup.fluidUID, ids.velocityFieldInflowSliceGPU, wallDistanceFunctor, ids.pdfField); - // BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfFieldGPU, setup.fluidUID, ids.pdfField); + geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, ids.flagField, setup.fluidUID, cell_idx_c(0)); + BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfFieldGPU, setup.fluidUID, + ids.velocityFieldInflowSliceGPU, setup.latticeVelocity, wallDistanceFunctor, ids.pdfField); if(config->getNumBlocks("TPMS") > 0 && loggingParameters.getParameter< bool >("writeSurfaceMeshOfTPMS", false) ){ WALBERLA_LOG_INFO_ON_ROOT(++step << ". Writing TPMS as surface file") @@ -354,6 +392,7 @@ int main(int argc, char** argv){ lbm_generated::BasicRecursiveTimeStepGPU< GPUPdfField_T, SweepCollection_T, BoundaryCollection_T > LBMMeshRefinement(blocks, ids.pdfFieldGPU, sweepCollection, boundaryCollection, nonUniformCommunication, nonUniformPackInfo); + // timeloop.addFuncBeforeTimeStep(LBMMeshRefinement, "Refinement timestep"); LBMMeshRefinement.addRefinementToTimeLoop(timeloop, uint_c(0)); shared_ptr< DragEvaluation > evaluation; @@ -361,19 +400,57 @@ int main(int argc, char** argv){ WALBERLA_LOG_INFO_ON_ROOT(++step << ". Setup Drag Evaluation") evaluation = make_shared<DragEvaluation>(blocks, boundaryCollection, ids, setup, config); LBMMeshRefinement.addPostBoundaryHandlingBlockFunction(evaluation->forceCalculationFunctor()); + } + + + const AABB sliceXY(finalDomain.xMin(), finalDomain.yMin(), finalDomain.center()[2] - setup.dxF, + finalDomain.xMax(), finalDomain.yMax(), finalDomain.center()[2] + setup.dxF); +/* + CellInterval globalCellInterval; + blocks->getCellBBFromAABB( globalCellInterval, sliceXY, uint_c(0) ); + + std::map<IBlock*, CellInterval> sliceCellIntervals; + + for (auto& block : *blocks){ + CellInterval cellBB; + blocks->getCellBBFromAABB( cellBB, sliceXY, blocks->getLevel(block) ); + blocks->transformGlobalToBlockLocalCellInterval( cellBB, block ); + + Cell maxCell(cell_idx_c(0), cell_idx_c(0), cell_idx_c(0)); + Cell minCell(cell_idx_c( blocks->getNumberOfXCells( block ) ), cell_idx_c( blocks->getNumberOfYCells( block ) ), cell_idx_c( blocks->getNumberOfZCells( block ) )); + + for( cell_idx_t z = cell_idx_c(0); z != cell_idx_c( blocks->getNumberOfZCells( block ) ); ++z ){ + for( cell_idx_t y = cell_idx_c(0); y != cell_idx_c( blocks->getNumberOfYCells( block ) ); ++y ){ + for( cell_idx_t x = cell_idx_c(0); x != cell_idx_c( blocks->getNumberOfXCells( block ) ); ++x ){ + if( cellBB.contains(x,y,z) ){ + if(Cell(x, y, z) < minCell){ + minCell = Cell(x, y, z); + } + if(!(Cell(x, y, z) < maxCell)){ + maxCell = Cell(x, y, z); + } + } + } + } + } + CellInterval localCellInterval(minCell, maxCell); + if(!localCellInterval.empty()){ + sliceCellIntervals[&block] = localCellInterval; + } } - timeloop.add() << BeforeFunction([&]() - { - for (auto& block : *blocks){ - sweepCollection.calculateMacroscopicParameters(&block); - } - const uint_t velCtr = uint_c(welfordSweep.getCounter()); - welfordSweep.setCounter(welfordSweep.getCounter() + real_c(1.0)); - }) << Sweep(welfordSweep.getSweep(), "welford sweep"); + { + for (auto const& sliceCellInterval : sliceCellIntervals){ + sweepCollection.calculateMacroscopicParametersCellInterval(sliceCellInterval.first, sliceCellInterval.second); + } + + const uint_t velCtr = uint_c(welfordSweep.getCounter()); + welfordSweep.setCounter(welfordSweep.getCounter() + real_c(1.0)); + }) << Sweep(welfordSweep.getSweepOnCellInterval(blocks, globalCellInterval), "welford sweep"); +*/ /*SweepTimeloop timeloop(blocks->getBlockStorage(), setup.timesteps); timeloop.add() << BeforeFunction(communication.getCommunicateFunctor(), "communication") @@ -394,7 +471,6 @@ int main(int argc, char** argv){ const uint_t initialWriteCallsToSkip = VTKWriter.getParameter< uint_t >("initialWriteCallsToSkip", uint_c(0.0)); const real_t sliceThickness = VTKWriter.getParameter< real_t >("sliceThickness", real_c(1.0)); - auto finalDomain = blocks->getDomain(); if (vtkWriteFrequency > 0){ const std::string vtkFolder = "VTKPorousMediaRE_" + std::to_string(uint64_c(setup.reynoldsNumber)); auto vtkOutput = @@ -404,16 +480,19 @@ int main(int argc, char** argv){ vtkOutput->setInitialWriteCallsToSkip(initialWriteCallsToSkip); vtkOutput->addBeforeFunction([&]() { - /* for (auto& block : *blocks){ sweepCollection.calculateMacroscopicParameters(&block); } - */ + + + // for (auto const& sliceCellInterval : sliceCellIntervals){ + // sweepCollection.calculateMacroscopicParametersCellInterval(sliceCellInterval.first, sliceCellInterval.second); + // } #if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT) gpu::fieldCpy< VelocityField_T, gpu::GPUField< VelocityField_T::value_type > >(blocks, ids.velocityField, ids.velocityFieldGPU); - gpu::fieldCpy< VelocityField_T, gpu::GPUField< VelocityField_T::value_type > >(blocks, ids.meanVelocityField, ids.meanVelocityFieldGPU); - gpu::fieldCpy< ScalarField_T, gpu::GPUField< ScalarField_T::value_type > >(blocks, ids.densityField, ids.densityFieldGPU); + // gpu::fieldCpy< VelocityField_T, gpu::GPUField< VelocityField_T::value_type > >(blocks, ids.meanVelocityField, ids.meanVelocityFieldGPU); + // gpu::fieldCpy< ScalarField_T, gpu::GPUField< ScalarField_T::value_type > >(blocks, ids.densityField, ids.densityFieldGPU); #endif }); @@ -425,8 +504,8 @@ int main(int argc, char** argv){ if (VTKWriter.getParameter< bool >("writeOnlySlice", true)){ - const AABB sliceXY(finalDomain.xMin(), finalDomain.yMin(), finalDomain.center()[2] - sliceThickness * setup.dxF, - finalDomain.xMax(), finalDomain.yMax(), finalDomain.center()[2] + sliceThickness * setup.dxF); + // const AABB sliceXY(finalDomain.xMin(), finalDomain.yMin(), finalDomain.center()[2] - sliceThickness * setup.dxF, + // finalDomain.xMax(), finalDomain.yMax(), finalDomain.center()[2] + sliceThickness * setup.dxF); vtkOutput->addCellInclusionFilter(vtk::AABBCellFilter(sliceXY)); if (VTKWriter.getParameter< bool >("writeXZSlice", false)){ diff --git a/apps/showcases/PorousMediaGPU/PorousMediaGPU.py b/apps/showcases/PorousMediaGPU/PorousMediaGPU.py index 6138be652..009f0742b 100644 --- a/apps/showcases/PorousMediaGPU/PorousMediaGPU.py +++ b/apps/showcases/PorousMediaGPU/PorousMediaGPU.py @@ -12,6 +12,7 @@ from lbmpy.flow_statistics import welford_assignments from pystencils_walberla import CodeGeneration, generate_info_header, generate_sweep from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator +from lbmpy_walberla import RefinementScaling info_header = """ #pragma once @@ -23,11 +24,13 @@ std::map<std::string, std::string> infoMap{{{{"stencil", "{stencil}"}}, omega = sp.symbols("omega") inlet_velocity = sp.symbols("u_x") -force_vector = tuple([sp.Symbol("F_x"), 0.0, 0.0]) +fx = sp.Symbol("Fx") +force_vector = tuple([fx, 0.0, 0.0]) max_threads = 256 - + with CodeGeneration() as ctx: target = Target.GPU if ctx.gpu else Target.CPU + name_prefix = "PorousMediaGPU" sweep_params = {'block_size': (128, 1, 1)} if ctx.gpu else {} # The application is meant to be compiled with double precision. For single precision, the pdf_dtype can be switched @@ -40,7 +43,7 @@ with CodeGeneration() as ctx: q = stencil.Q dim = stencil.D - streaming_pattern = 'aa' + streaming_pattern = 'esopull' pdfs = fields(f"pdfs({stencil.Q}): {pdf_dtype}[3D]", layout='fzyx') velocity_field, density_field = fields(f"velocity({dim}), density(1) : {dtype}[{dim}D]", layout='fzyx') @@ -48,23 +51,20 @@ with CodeGeneration() as ctx: macroscopic_fields = {'density': density_field, 'velocity': velocity_field} - # method_enum = Method.CUMULANT - method_enum = Method.SRT + # method_enum = Method.CENTRAL_MOMENT + method_enum = Method.CUMULANT lbm_config = LBMConfig( method=method_enum, stencil=stencil, relaxation_rate=omega, compressible=True, - force_model=ForceModel.GUO, force=force_vector, # subgrid_scale_model=SubgridScaleModel.QR, fourth_order_correction=0.01 if method_enum == Method.CUMULANT and stencil.Q == 27 else False, - field_name='pdfs', streaming_pattern=streaming_pattern ) - lbm_opt = LBMOptimisation(cse_global=False, cse_pdfs=False, field_layout="fzyx", - symbolic_field=pdfs) + lbm_opt = LBMOptimisation(cse_global=False, cse_pdfs=False, field_layout="fzyx") collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) if lbm_config.method == Method.CUMULANT: @@ -80,8 +80,13 @@ with CodeGeneration() as ctx: boundary_object=quadratic_bounce_back, field_data_type=pdf_dtype) + + turbulentubb = lbm_boundary_generator(class_name='TurbulentInflow', flag_uid='TurbulentInflow', + boundary_object=UBB(velocity=velocity_field.center_vector, density=1.0, data_type=dtype, dim=dim), + field_data_type=pdf_dtype) + ubb = lbm_boundary_generator(class_name='UBB', flag_uid='UBB', - boundary_object=UBB(velocity=velocity_field.center_vector, density=1.0, data_type=dtype, dim=dim), + boundary_object=UBB(velocity=(inlet_velocity, 0.0, 0.0), density=1.0, data_type=dtype, dim=dim), field_data_type=pdf_dtype) # outflow_boundary = ExtrapolationOutflow(stencil[4], lb_method, data_type=pdf_dtype, density=1.0) @@ -94,13 +99,17 @@ with CodeGeneration() as ctx: # boundary_object=FixedDensity(1.0), # field_data_type=pdf_dtype) - generate_lbm_package(ctx, name="PorousMediaGPU", collision_rule=collision_rule, + refinement_scaling = RefinementScaling() + refinement_scaling.add_standard_relaxation_rate_scaling(omega) + refinement_scaling.add_force_scaling(fx) + + generate_lbm_package(ctx, name=name_prefix, collision_rule=collision_rule, lbm_config=lbm_config, lbm_optimisation=lbm_opt, - nonuniform=True, boundaries=[no_slip, no_slip_interpolated, ubb, outflow], + nonuniform=True, refinement_scaling=refinement_scaling, boundaries=[no_slip, no_slip_interpolated, ubb, turbulentubb, outflow], macroscopic_fields=macroscopic_fields, gpu_indexing_params=sweep_params, target=target, data_type=dtype, pdfs_data_type=pdf_dtype, max_threads=max_threads) - + welford_update = welford_assignments(field=velocity_field, mean_field=mean_velocity) generate_sweep(ctx, "Welford", welford_update, target=target) @@ -108,7 +117,7 @@ with CodeGeneration() as ctx: 'ScalarField_T': density_field} # Info header containing correct template definitions for stencil and field - generate_info_header(ctx, 'PorousMediaGPUInfoHeader', + generate_info_header(ctx, f'{name_prefix}InfoHeader', field_typedefs=field_typedefs) infoHeaderParams = { @@ -117,4 +126,4 @@ with CodeGeneration() as ctx: 'collision_operator': lbm_config.method.name.lower(), } - ctx.write_file("PorousMediaGPUStaticDefines.h", info_header.format(**infoHeaderParams)) \ No newline at end of file + ctx.write_file(f"{name_prefix}StaticDefines.h", info_header.format(**infoHeaderParams)) \ No newline at end of file diff --git a/apps/showcases/PorousMediaGPU/Setup.h b/apps/showcases/PorousMediaGPU/Setup.h index a0a87b5fa..13fe8a56a 100644 --- a/apps/showcases/PorousMediaGPU/Setup.h +++ b/apps/showcases/PorousMediaGPU/Setup.h @@ -23,6 +23,8 @@ #include "core/DataTypes.h" #include "core/math/Vector3.h" +#include "lbm_generated/refinement/RefinementScaling.h" + #include <string> #include "Types.h" @@ -59,9 +61,10 @@ struct Setup WALBERLA_CHECK_FLOAT_EQUAL_EPSILON(coarseMeshSize, domainSize[1] / real_c(rootBlocks[1] * cellsPerBlock[1]), real_c(1e-12), "Resulting resolution is not equal in x and y direxction. This is not supported"); WALBERLA_CHECK_FLOAT_EQUAL_EPSILON(coarseMeshSize, domainSize[2] / real_c(rootBlocks[2] * cellsPerBlock[2]), real_c(1e-12), "Resulting resolution is not equal in x and z direxction. This is not supported"); - referenceLength = domainSize[1]; - reynoldsNumber = parameters.getParameter< real_t >("reynoldsNumber"); - latticeVelocity = parameters.getParameter< real_t >("latticeVelocity"); + referenceLength = domainSize[1]; + reynoldsNumber = parameters.getParameter< real_t >("reynoldsNumber"); + latticeVelocity = parameters.getParameter< real_t >("latticeVelocity"); + initialiseWithlatticeVelocity = parameters.getParameter< bool >("initialiseWithlatticeVelocity"); useGrid = turbulence.getParameter< bool >("useGrid"); turbulentInflow = turbulence.getParameter< bool >("turbulentInflow"); @@ -71,19 +74,23 @@ struct Setup uc = turbulence.getParameter< Vector3< real_t > >("U_c"); if(config->getNumBlocks("RoughWall") > 0){ - kinViscosity = parameters.getParameter< real_t >("viscosity"); + kinViscosity = parameters.getParameter< real_t >("viscosity"); + const real_t bulkReynoldsNumber = parameters.getParameter< real_t >("bulkReynoldsNumber"); referenceLength *= real_c(0.5); - - referenceVelocity = (reynoldsNumber * kinViscosity) / referenceLength; - dt = latticeVelocity / referenceVelocity * coarseMeshSize; - viscosity = (kinViscosity * dt) / (coarseMeshSize * coarseMeshSize); + const real_t bulkVelocity = (bulkReynoldsNumber * kinViscosity) / referenceLength; + dt = latticeVelocity / bulkVelocity * coarseMeshSize; + viscosity = (kinViscosity * dt) / (coarseMeshSize * coarseMeshSize); const real_t latticeReferenceLength = referenceLength / coarseMeshSize; + + referenceVelocity = (reynoldsNumber * kinViscosity) / referenceLength; const real_t speedOfSound = real_c(real_c(1.0) / std::sqrt( real_c(3.0) )); machNumber = latticeVelocity / speedOfSound; - // viscosity = real_c((latticeVelocity * latticeReferenceLength) / reynoldsNumber); omega = real_c(real_c(1.0) / (real_c(3.0) * viscosity + real_c(0.5))); - drivingForce = calculateDrivingForce(latticeVelocity, latticeReferenceLength); + + const real_t frictionVelocity = (reynoldsNumber * viscosity) / latticeReferenceLength; + + drivingForce = calculateDrivingForce(frictionVelocity, latticeReferenceLength); } else{ referenceVelocity = parameters.getParameter< real_t >("referenceVelocity"); @@ -98,6 +105,8 @@ struct Setup kinViscosity = viscosity * coarseMeshSize * coarseMeshSize / dt; } + omegaF = lbm_generated::relaxationRateScaling(omega, refinementLevels); + rho = real_c(1.0); dxC = coarseMeshSize; dxF = real_c(coarseMeshSize) / real_c( 1 << refinementLevels ); @@ -115,11 +124,12 @@ struct Setup streamingPattern = infoMap["streamingPattern"]; collisionModel = infoMap["collisionOperator"]; - fluidUID = FlagUID("Fluid"); - noSlipUID = FlagUID("NoSlip"); - obstacleUID = FlagUID("Obstacle"); - inflowUID = FlagUID("UBB"); - outflowUID = FlagUID("Outflow"); + fluidUID = FlagUID("Fluid"); + noSlipUID = FlagUID("NoSlip"); + obstacleUID = FlagUID("Obstacle"); + inflowUID = FlagUID("UBB"); + turbulentInflowUID = FlagUID("TurbulentInflow"); + outflowUID = FlagUID("Outflow"); writeSetupForestAndReturn = logging.getParameter< bool >("writeSetupForestAndReturn", false); remainingTimeLoggerFrequency = logging.getParameter< real_t >("remainingTimeLoggerFrequency", 3.0); @@ -141,6 +151,7 @@ struct Setup real_t referenceVelocity; real_t referenceLength; real_t latticeVelocity; + bool initialiseWithlatticeVelocity; bool useGrid; bool turbulentInflow; @@ -155,6 +166,7 @@ struct Setup real_t viscosity; real_t kinViscosity; real_t omega; + real_t omegaF; real_t rho; real_t dxC; real_t dxF; @@ -176,9 +188,11 @@ struct Setup FlagUID noSlipUID; FlagUID obstacleUID; FlagUID inflowUID; + FlagUID turbulentInflowUID; FlagUID outflowUID; uint8_t inflowFlag; + uint8_t turbulentInflowFlag; uint8_t outflowFlag; uint8_t noSlipFlag; uint8_t obstacleFlag; diff --git a/apps/showcases/PorousMediaGPU/TPMS.prm b/apps/showcases/PorousMediaGPU/TPMS.prm index b7ee4728e..019bd4890 100644 --- a/apps/showcases/PorousMediaGPU/TPMS.prm +++ b/apps/showcases/PorousMediaGPU/TPMS.prm @@ -5,7 +5,7 @@ Parameters referenceVelocity 1.0; referenceLength 1.0; latticeVelocity 0.025; - initialiseWithInletVelocity true; + initialiseWithlatticeVelocity true; timesteps 100001; @@ -54,7 +54,7 @@ DomainSetup Boundaries { - Border { direction W; walldistance -1; flag UBB; } + Border { direction W; walldistance -1; flag TurbulentInflow; } Border { direction E; walldistance -1; flag Outflow; } } diff --git a/apps/showcases/PorousMediaGPU/roughWall.prm b/apps/showcases/PorousMediaGPU/roughWall.prm index c54b0bc6b..23e69c1e1 100644 --- a/apps/showcases/PorousMediaGPU/roughWall.prm +++ b/apps/showcases/PorousMediaGPU/roughWall.prm @@ -1,10 +1,12 @@ Parameters { reynoldsNumber 400; + bulkReynoldsNumber 4466; viscosity 1.48e-5; latticeVelocity 0.05; + initialiseWithlatticeVelocity true; - timesteps 5001; + timesteps 800001; processMemoryLimit 512; // MiB innerOuterSplit <1, 1, 1>; @@ -31,14 +33,12 @@ RoughWall DomainSetup { - // domainSize < 6.9, 3.2, 12 >; - // cellsPerBlock < 69, 32, 120 >; + domainSize < 34.6, 40, 4 >; + cellsPerBlock < 346, 40, 40 >; - domainSize < 55.4, 40, 12 >; - cellsPerBlock < 554, 400, 120 >; - blocks < 1, 1, 1 >; - periodic < true, false, false >; - refinementLevels 0; + blocks < 1, 10, 1 >; + periodic < true, false, true >; + refinementLevels 2; numberProcesses 1; // This is for load balancing, overwritten if more than one proc is used blockForestFilestem porousMediaBlockForest; } @@ -60,12 +60,12 @@ StabilityChecker VTKWriter { - vtkWriteFrequency 5000; + vtkWriteFrequency 1000; vtkGhostLayers 0; velocity true; writeVelocityAsMagnitude true; density false; - meanVelocity true; + meanVelocity false; flag false; writeOnlySlice true; sliceThickness 1; @@ -73,7 +73,7 @@ VTKWriter amrFileFormat false; oneFilePerProcess false; samplingResolution -1; - initialWriteCallsToSkip 0; + initialWriteCallsToSkip 500000; } Logging @@ -94,6 +94,6 @@ Probes DragEvaluation { - evaluationFrequency 1; + evaluationFrequency 40; filename drag.h5; } \ No newline at end of file diff --git a/python/lbmpy_walberla/packing_kernels.py b/python/lbmpy_walberla/packing_kernels.py index af7be15cc..862a6cdaa 100644 --- a/python/lbmpy_walberla/packing_kernels.py +++ b/python/lbmpy_walberla/packing_kernels.py @@ -103,9 +103,9 @@ class PackingKernelsCodegen: self.accessors = [get_accessor(streaming_pattern, t) for t in get_timesteps(streaming_pattern)] self.mask_field = fields(f'mask : uint32 [{self.dim}D]', layout=src_field.layout) - # self.block_wise = True - # if not self.inplace or not self.config.target == Target.GPU: - self.block_wise = False + self.block_wise = True + if not self.inplace or not self.config.target == Target.GPU: + self.block_wise = False self.index = TypedSymbol("index", dtype=BasicType(np.int64)) self.index_shape = TypedSymbol("_size_0", dtype=BasicType(np.int64)) diff --git a/python/lbmpy_walberla/sweep_collection.py b/python/lbmpy_walberla/sweep_collection.py index 164dd94d3..a54fa088c 100644 --- a/python/lbmpy_walberla/sweep_collection.py +++ b/python/lbmpy_walberla/sweep_collection.py @@ -158,20 +158,6 @@ def generate_lbm_sweep_collection(ctx, class_name: str, collision_rule: LbmColli ctx.write_file(f"{class_name}.{source_extension}", source) -class RefinementScaling: - def __init__(self): - self.scaling_info = [] - - def add_standard_relaxation_rate_scaling(self, viscosity_relaxation_rate): - self.add_scaling(viscosity_relaxation_rate) - - def add_scaling(self, parameter): - if isinstance(parameter, sp.Symbol): - self.scaling_info.append(parameter.name) - else: - raise ValueError("Only pure symbols allowed") - - def lbm_kernel_family(class_name, kernel_name, collision_rule, streaming_pattern, src_field, dst_field, config: CreateKernelConfig): diff --git a/python/lbmpy_walberla/walberla_lbm_package.py b/python/lbmpy_walberla/walberla_lbm_package.py index 7465a45bb..25da6c610 100644 --- a/python/lbmpy_walberla/walberla_lbm_package.py +++ b/python/lbmpy_walberla/walberla_lbm_package.py @@ -1,20 +1,25 @@ from typing import Callable, List, Dict from pystencils import Target, Field +from sympy import Symbol from lbmpy.creationfunctions import LbmCollisionRule, LBMConfig, LBMOptimisation +from lbmpy.relaxationrates import get_shear_relaxation_rate from pystencils_walberla.cmake_integration import CodeGenerationContext from lbmpy_walberla.boundary_collection import generate_boundary_collection from lbmpy_walberla.storage_specification import generate_lbm_storage_specification -from lbmpy_walberla.sweep_collection import generate_lbm_sweep_collection, RefinementScaling +from lbmpy_walberla.sweep_collection import generate_lbm_sweep_collection + +from lbmpy_walberla import RefinementScaling def generate_lbm_package(ctx: CodeGenerationContext, name: str, collision_rule: LbmCollisionRule, lbm_config: LBMConfig, lbm_optimisation: LBMOptimisation, - nonuniform: bool = False, boundaries: List[Callable] = None, + nonuniform: bool = False, refinement_scaling: RefinementScaling = None, + boundaries: List[Callable] = None, macroscopic_fields: Dict[str, Field] = None, target: Target = Target.CPU, data_type=None, pdfs_data_type=None, @@ -33,13 +38,23 @@ def generate_lbm_package(ctx: CodeGenerationContext, name: str, data_type=pdfs_data_type, cpu_openmp=cpu_openmp) - if nonuniform: - omega = lbm_config.relaxation_rate + if refinement_scaling is None: refinement_scaling = RefinementScaling() - refinement_scaling.add_standard_relaxation_rate_scaling(omega) + + if nonuniform: + if not refinement_scaling.scaling_info: + omega = get_shear_relaxation_rate(method) + refinement_scaling.add_standard_relaxation_rate_scaling(omega) + + if lbm_config.force: + for f_i in lbm_config.force: + if f_i != 0: + assert isinstance(f_i, Symbol) + refinement_scaling.add_force_scaling(f_i) else: refinement_scaling = None + generate_lbm_sweep_collection(ctx, f'{name}SweepCollection', collision_rule, lbm_config=lbm_config, lbm_optimisation=lbm_optimisation, refinement_scaling=refinement_scaling, diff --git a/python/pystencils_walberla/jinja_filters.py b/python/pystencils_walberla/jinja_filters.py index 22ae0ecea..b1c61b589 100644 --- a/python/pystencils_walberla/jinja_filters.py +++ b/python/pystencils_walberla/jinja_filters.py @@ -5,6 +5,7 @@ except ImportError: from jinja2 import contextfilter as jinja2_context_decorator from collections.abc import Iterable +import re import sympy as sp from pystencils import Target, Backend @@ -45,10 +46,7 @@ standard_parameter_registration = """ for (uint_t level = 0; level < blocks->getNumberOfLevels(); level++) {{ const {dtype} level_scale_factor = {dtype}(uint_t(1) << level); - const {dtype} one = {dtype}(1.0); - const {dtype} half = {dtype}(0.5); - - {name}Vector.push_back( {dtype}({name} / (level_scale_factor * (-{name} * half + one) + {name} * half)) ); + {function_rules} }} """ @@ -263,11 +261,17 @@ def generate_refs_for_kernel_parameters(kernel_info, prefix, parameters_to_ignor symbols.difference_update(pointer_symbols) type_information = {p.symbol.name: p.symbol.dtype for p in kernel_info.parameters if not p.is_field_parameter} result = [] - registered_parameters = [] if not parameter_registration else parameter_registration.scaling_info + + if parameter_registration is None: + registered_parameters = [] + else: + registered_parameters = [scaling[1] for scaling in parameter_registration.scaling_info] + for s in symbols: if s in registered_parameters: dtype = type_information[s].c_name if not level_known: + level_known = True result.append("const uint_t level = block->getBlockStorage().getLevel(*block);") result.append(f"{dtype} & {s} = {s}Vector[level];") else: @@ -487,7 +491,9 @@ def generate_constructor_initializer_list(kernel_infos, parameters_to_ignore=Non # Then free parameters if parameter_registration is not None: - parameters_to_skip.extend(parameter_registration.scaling_info) + for scaling in parameter_registration.scaling_info: + parameters_to_skip.append(scaling[1]) + for kernel_info in kernel_infos: for param in kernel_info.parameters: @@ -589,12 +595,18 @@ def generate_members(ctx, kernel_infos, parameters_to_ignore=None, only_fields=F result.append(f"BlockDataID {param.field_name}ID;") params_to_skip.append(param.field_name) + if parameter_registration is None: + scaling_identifiers = [] + else: + scaling_identifiers = [scaling[1] for scaling in parameter_registration.scaling_info] + + for kernel_info in kernel_infos: for param in kernel_info.parameters: if only_fields and not param.is_field_parameter: continue if not param.is_field_parameter and param.symbol.name not in params_to_skip: - if parameter_registration and param.symbol.name in parameter_registration.scaling_info: + if param.symbol.name in scaling_identifiers: result.append(f"std::vector<{param.symbol.dtype}> {param.symbol.name}Vector;") else: result.append(f"{param.symbol.dtype} {param.symbol.name}_;") @@ -719,10 +731,13 @@ def generate_getter(kernel_infos, parameter_registration=None): params_to_skip = [] result = [] + scaling_identifiers = [] + if parameter_registration: + scaling_identifiers = [scaling[1] for scaling in parameter_registration.scaling_info] for kernel_info in kernel_infos: for param in kernel_info.parameters: if not param.is_field_parameter and param.symbol.name not in params_to_skip: - if parameter_registration and param.symbol.name in parameter_registration.scaling_info: + if param.symbol.name in scaling_identifiers: continue dtype = param.symbol.dtype name = param.symbol.name @@ -739,10 +754,13 @@ def generate_setter(kernel_infos, parameter_registration=None): params_to_skip = [] result = [] + scaling_identifiers = [] + if parameter_registration: + scaling_identifiers = [scaling[1] for scaling in parameter_registration.scaling_info] for kernel_info in kernel_infos: for param in kernel_info.parameters: if not param.is_field_parameter and param.symbol.name not in params_to_skip: - if parameter_registration and param.symbol.name in parameter_registration.scaling_info: + if param.symbol.name in scaling_identifiers: continue dtype = param.symbol.dtype name = param.symbol.name @@ -761,34 +779,70 @@ def generate_parameter_registration(kernel_infos, parameter_registration): params_to_skip = [] result = [] + + scaling_identifiers = [scaling[1] for scaling in parameter_registration.scaling_info] + function_rules = [] + for kernel_info in kernel_infos: for param in kernel_info.parameters: if not param.is_field_parameter and param.symbol.name not in params_to_skip: - if param.symbol.name in parameter_registration.scaling_info: - result.append(standard_parameter_registration.format(dtype=param.symbol.dtype, - name=param.symbol.name)) - params_to_skip.append(param.symbol.name) + if param.symbol.name in scaling_identifiers: + name = param.symbol.name + dtype = param.symbol.dtype + + idx = scaling_identifiers.index(param.symbol.name) + lambda_expr = parameter_registration.scaling_info[idx][2] + raw_scaling = convert_to_raw_form(name, lambda_expr) + function_rules.append(raw_scaling.format(dtype=dtype)) + params_to_skip.append(name) + + function_rules_baked = "\n ".join(function_rules) + result.append(standard_parameter_registration.format(dtype=dtype, function_rules=function_rules_baked)) + return "\n".join(result) +def convert_to_raw_form(name, expression): + expr_str = str(expression) + + # Extract the body of the lambda expression + start_idx = expr_str.find(":") + 1 + body = expr_str[start_idx:].strip() + + # Replace numeric constants with the {dtype}(number) format + body = re.sub(r'(?<!\w)(\d+(\.\d*)?|\.\d+)(?!\w)', lambda match: f'{{dtype}}({match.group(1)})', body) + + return f"{name}Vector.push_back({{dtype}}({body}));" + + def generate_constructor(kernel_infos, parameter_registration): if parameter_registration is None: return "" if not isinstance(kernel_infos, Iterable): kernel_infos = [kernel_infos] - params_to_skip = [] result = [] + params_to_skip = [] + + scaling_identifiers = [scaling[1] for scaling in parameter_registration.scaling_info] + function_rules = [] + for kernel_info in kernel_infos: for param in kernel_info.parameters: if not param.is_field_parameter and param.symbol.name not in params_to_skip: - if param.symbol.name in parameter_registration.scaling_info: + if param.symbol.name in scaling_identifiers: name = param.symbol.name dtype = param.symbol.dtype - result.append(standard_parameter_registration.format(dtype=dtype, name=name)) - params_to_skip.append(name) + idx = scaling_identifiers.index(param.symbol.name) + lambda_expr = parameter_registration.scaling_info[idx][2] + raw_scaling = convert_to_raw_form(name, lambda_expr) + function_rules.append(raw_scaling.format(dtype=dtype)) + params_to_skip.append(name) + + function_rules_baked = "\n ".join(function_rules) + result.append(standard_parameter_registration.format(dtype=dtype, function_rules=function_rules_baked)) return "\n".join(result) diff --git a/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.impl.h b/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.impl.h index 0327b9b52..054b492b9 100644 --- a/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.impl.h +++ b/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.impl.h @@ -63,7 +63,7 @@ void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollectio // 2.1 Collision and Ghost-Layer Propagation for(auto b: blocks_[level]){ - ghostLayerPropagation(b); // GL-Propagation first without swapping arrays... + ghostLayerPropagation(b, nullptr); // GL-Propagation first without swapping arrays... sweepCollection_.streamCollide(b); // then Stream-Collide on interior, and swap arrays } diff --git a/src/lbm_generated/refinement/RefinementScaling.h b/src/lbm_generated/refinement/RefinementScaling.h index abee51e71..2a2e1ff54 100644 --- a/src/lbm_generated/refinement/RefinementScaling.h +++ b/src/lbm_generated/refinement/RefinementScaling.h @@ -21,6 +21,7 @@ #pragma once #include "core/DataTypes.h" +#include "core/math/Vector3.h" namespace walberla::lbm_generated { @@ -28,10 +29,22 @@ namespace walberla::lbm_generated inline real_t relaxationRateScaling( real_t relaxationRate, uint_t refinementLevel ) { const real_t levelScaleFactor = real_c(uint_c(1) << refinementLevel); - const real_t one = real_c(1.0); - const real_t half = real_c(0.5); + const real_t one = real_c(1.0); + const real_t half = real_c(0.5); return real_c(relaxationRate / (levelScaleFactor * (-relaxationRate * half + one) + relaxationRate * half)); } +inline Vector3< real_t > levelDependentBodyForce(const Vector3< real_t > & forceDensity, const uint_t refinementLevel) +{ + const real_t levelScaleFactor = real_c(uint_c(1) << refinementLevel); + return forceDensity * ( real_c(1.0) / levelScaleFactor ); +} + +inline real_t levelDependentBodyForce(const real_t & forceDensity, const uint_t refinementLevel) +{ + const real_t levelScaleFactor = real_c(uint_c(1) << refinementLevel); + return forceDensity * ( real_c(1.0) / levelScaleFactor ); +} + } // namespace walberla::lbm_generated \ No newline at end of file -- GitLab