Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Commits on Source (154)
Showing
with 758 additions and 809 deletions
...@@ -10,6 +10,7 @@ qrc_* ...@@ -10,6 +10,7 @@ qrc_*
# CLion indexing # CLion indexing
*.uuid *.uuid
.fleet
# Generated files # Generated files
...@@ -32,11 +33,13 @@ qrc_* ...@@ -32,11 +33,13 @@ qrc_*
# Visual Studio Code # Visual Studio Code
/.vscode /.vscode
# Zed
/.cache*
# CLion # CLion
*.idea *.idea
*.clion* *.clion*
# QtCreator # QtCreator
CMakeLists.txt.user.* CMakeLists.txt.user.*
......
...@@ -22,7 +22,6 @@ if ( WALBERLA_BUILD_WITH_PYTHON ) ...@@ -22,7 +22,6 @@ if ( WALBERLA_BUILD_WITH_PYTHON )
add_subdirectory( FieldCommunication ) add_subdirectory( FieldCommunication )
if ( WALBERLA_BUILD_WITH_CODEGEN ) if ( WALBERLA_BUILD_WITH_CODEGEN )
add_subdirectory( FlowAroundSphereCodeGen )
add_subdirectory( UniformGridCPU ) add_subdirectory( UniformGridCPU )
add_subdirectory( PhaseFieldAllenCahn ) add_subdirectory( PhaseFieldAllenCahn )
add_subdirectory( NonUniformGridCPU ) add_subdirectory( NonUniformGridCPU )
......
...@@ -988,7 +988,7 @@ int main( int argc, char **argv ) ...@@ -988,7 +988,7 @@ int main( int argc, char **argv )
} }
logging::Logging::printHeaderOnStream(); logging::Logging::printHeaderOnStream();
//WALBERLA_ROOT_SECTION() { logging::Logging::instance()->setLogLevel( logging::Logging::PROGRESS ); } WALBERLA_ROOT_SECTION() { logging::Logging::instance()->setLogLevel( logging::Logging::INFO ); }
#ifdef _OPENMP #ifdef _OPENMP
if( std::getenv( "OMP_NUM_THREADS" ) == nullptr ) if( std::getenv( "OMP_NUM_THREADS" ) == nullptr )
......
waLBerla_link_files_to_builddir( "*.py" )
waLBerla_generate_target_from_python(NAME FlowAroundSphereGenerated
FILE FlowAroundSphereCodeGen.py
OUT_FILES FlowAroundSphereCodeGen_LbSweep.${CODEGEN_FILE_SUFFIX} FlowAroundSphereCodeGen_LbSweep.h
FlowAroundSphereCodeGen_MacroSetter.${CODEGEN_FILE_SUFFIX} FlowAroundSphereCodeGen_MacroSetter.h
FlowAroundSphereCodeGen_UBB.${CODEGEN_FILE_SUFFIX} FlowAroundSphereCodeGen_UBB.h
FlowAroundSphereCodeGen_NoSlip.${CODEGEN_FILE_SUFFIX} FlowAroundSphereCodeGen_NoSlip.h
FlowAroundSphereCodeGen_Outflow.${CODEGEN_FILE_SUFFIX} FlowAroundSphereCodeGen_Outflow.h
FlowAroundSphereCodeGen_PackInfoEven.${CODEGEN_FILE_SUFFIX} FlowAroundSphereCodeGen_PackInfoEven.h
FlowAroundSphereCodeGen_PackInfoOdd.${CODEGEN_FILE_SUFFIX} FlowAroundSphereCodeGen_PackInfoOdd.h
FlowAroundSphereCodeGen_InfoHeader.h)
if (WALBERLA_BUILD_WITH_GPU_SUPPORT )
waLBerla_add_executable( NAME FlowAroundSphereCodeGen FILE FlowAroundSphereCodeGen.cpp
DEPENDS blockforest boundary core gpu domain_decomposition field geometry python_coupling timeloop vtk FlowAroundSphereGenerated)
else ()
waLBerla_add_executable( NAME FlowAroundSphereCodeGen FILE FlowAroundSphereCodeGen.cpp
DEPENDS blockforest boundary core domain_decomposition field geometry python_coupling timeloop vtk FlowAroundSphereGenerated)
endif (WALBERLA_BUILD_WITH_GPU_SUPPORT )
\ No newline at end of file
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file FlowAroundSphereCodeGen.cpp
//! \author Frederik Hennig <frederik.hennig@fau.de>
//! \author Markus Holzer <markus.holzer@fau.de>
//
//======================================================================================================================
#include "blockforest/all.h"
#include "core/all.h"
#include "domain_decomposition/all.h"
#include "field/all.h"
#include "geometry/all.h"
#include "lbm/inplace_streaming/TimestepTracker.h"
#include "lbm/vtk/QCriterion.h"
#include "python_coupling/CreateConfig.h"
#include "python_coupling/PythonCallback.h"
#include "timeloop/all.h"
#if defined(WALBERLA_BUILD_WITH_CUDA)
# include "gpu/AddGPUFieldToStorage.h"
# include "gpu/DeviceSelectMPI.h"
# include "gpu/HostFieldAllocator.h"
# include "gpu/NVTX.h"
# include "gpu/ParallelStreams.h"
# include "gpu/communication/GPUPackInfo.h"
# include "gpu/communication/UniformGPUScheme.h"
#endif
// CodeGen includes
#include "FlowAroundSphereCodeGen_InfoHeader.h"
namespace walberla
{
typedef lbm::FlowAroundSphereCodeGen_PackInfoEven PackInfoEven_T;
typedef lbm::FlowAroundSphereCodeGen_PackInfoOdd PackInfoOdd_T;
typedef walberla::uint8_t flag_t;
typedef FlagField< flag_t > FlagField_T;
#if defined(WALBERLA_BUILD_WITH_CUDA)
typedef gpu::GPUField< real_t > GPUField;
#endif
using namespace std::placeholders;
auto pdfFieldAdder = [](IBlock* const block, StructuredBlockStorage* const storage) {
return new PdfField_T(storage->getNumberOfXCells(*block), storage->getNumberOfYCells(*block),
storage->getNumberOfZCells(*block), uint_t(1), field::fzyx,
make_shared< field::AllocateAligned< real_t, 64 > >());
};
auto VelocityCallback = [](const Cell& pos, const shared_ptr< StructuredBlockForest >& SbF, IBlock& block,
real_t inflow_velocity, const bool constant_inflow = true) {
if (constant_inflow)
{
Vector3< real_t > result(inflow_velocity, real_c(0.0), real_c(0.0));
return result;
}
else
{
Cell globalCell;
CellInterval domain = SbF->getDomainCellBB();
auto h_y = real_c(domain.ySize());
auto h_z = real_c(domain.zSize());
SbF->transformBlockLocalToGlobalCell(globalCell, block, pos);
auto y1 = real_c(globalCell[1] - (h_y / 2.0 - 0.5));
auto z1 = real_c(globalCell[2] - (h_z / 2.0 - 0.5));
real_t u = (inflow_velocity * real_c(16.0)) / (h_y * h_y * h_z * h_z) * (h_y / real_c(2.0) - y1) *
(h_y / real_c(2.0) + y1) * (h_z / real_c(2.0) - z1) * (h_z / real_c(2.0) + z1);
Vector3< real_t > result(u, 0.0, 0.0);
return result;
}
};
class AlternatingBeforeFunction
{
public:
typedef std::function< void() > BeforeFunction;
AlternatingBeforeFunction(BeforeFunction evenFunc, BeforeFunction oddFunc,
std::shared_ptr< lbm::TimestepTracker >& tracker)
: tracker_(tracker), funcs_{ evenFunc, oddFunc } {};
void operator()() { funcs_[tracker_->getCounter()](); }
private:
std::shared_ptr< lbm::TimestepTracker > tracker_;
std::vector< BeforeFunction > funcs_;
};
class Filter
{
public:
explicit Filter(Vector3< uint_t > numberOfCells) : numberOfCells_(numberOfCells) {}
void operator()(const IBlock& /*block*/) {}
bool operator()(const cell_idx_t x, const cell_idx_t y, const cell_idx_t z) const
{
return x >= -1 && x <= cell_idx_t(numberOfCells_[0]) && y >= -1 && y <= cell_idx_t(numberOfCells_[1]) &&
z >= -1 && z <= cell_idx_t(numberOfCells_[2]);
}
private:
Vector3< uint_t > numberOfCells_;
};
using FluidFilter_T = Filter;
int main(int argc, char** argv)
{
walberla::Environment walberlaEnv(argc, argv);
#if defined(WALBERLA_BUILD_WITH_CUDA)
gpu::selectDeviceBasedOnMpiRank();
#endif
for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg)
{
WALBERLA_MPI_WORLD_BARRIER()
auto config = *cfg;
logging::configureLogging(config);
auto blocks = blockforest::createUniformBlockGridFromConfig(config);
// read parameters
Vector3< uint_t > cellsPerBlock =
config->getBlock("DomainSetup").getParameter< Vector3< uint_t > >("cellsPerBlock");
auto parameters = config->getOneBlock("Parameters");
const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(10));
const real_t omega = parameters.getParameter< real_t >("omega", real_c(1.9));
const real_t u_max = parameters.getParameter< real_t >("u_max", real_c(0.05));
const real_t reynolds_number = parameters.getParameter< real_t >("reynolds_number", real_c(1000.0));
const uint_t diameter_sphere = parameters.getParameter< uint_t >("diameter_sphere", uint_t(5));
const bool constant_inflow = parameters.getParameter< bool >("constant_inflow", true);
const real_t remainingTimeLoggerFrequency =
parameters.getParameter< real_t >("remainingTimeLoggerFrequency", real_c(3.0)); // in seconds
// create fields
BlockDataID pdfFieldID = blocks->addStructuredBlockData< PdfField_T >(pdfFieldAdder, "PDFs");
BlockDataID velFieldID = field::addToStorage< VelocityField_T >(blocks, "velocity", real_c(0.0), field::fzyx);
BlockDataID densityFieldID = field::addToStorage< ScalarField_T >(blocks, "density", real_c(0.0), field::fzyx);
#if defined(WALBERLA_BUILD_WITH_CUDA)
BlockDataID pdfFieldIDGPU = gpu::addGPUFieldToStorage< PdfField_T >(blocks, pdfFieldID, "PDFs on GPU", true);
BlockDataID velFieldIDGPU =
gpu::addGPUFieldToStorage< VelocityField_T >(blocks, velFieldID, "velocity on GPU", true);
BlockDataID densityFieldIDGPU =
gpu::addGPUFieldToStorage< ScalarField_T >(blocks, densityFieldID, "density on GPU", true);
#endif
BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field");
// initialise all PDFs
#if defined(WALBERLA_BUILD_WITH_CUDA)
pystencils::FlowAroundSphereCodeGen_MacroSetter setterSweep(pdfFieldIDGPU, velFieldIDGPU);
for (auto& block : *blocks)
setterSweep(&block);
gpu::fieldCpy< PdfField_T, GPUField >(blocks, pdfFieldID, pdfFieldIDGPU);
#else
pystencils::FlowAroundSphereCodeGen_MacroSetter setterSweep(pdfFieldID, velFieldID);
for (auto& block : *blocks)
setterSweep(&block);
#endif
// Create communication
#if defined(WALBERLA_BUILD_WITH_CUDA)
// This way of using alternating pack infos is temporary and will soon be replaced
// by something more straight-forward
gpu::communication::UniformGPUScheme< Stencil_T > comEven(blocks, false);
comEven.addPackInfo(make_shared< PackInfoEven_T >(pdfFieldIDGPU));
auto evenComm = std::function< void() >([&]() { comEven.communicate(); });
gpu::communication::UniformGPUScheme< Stencil_T > comODD(blocks, false);
comODD.addPackInfo(make_shared< PackInfoOdd_T >(pdfFieldIDGPU));
auto oddComm = std::function< void() >([&]() { comODD.communicate(); });
#else
blockforest::communication::UniformBufferedScheme< Stencil_T > evenComm(blocks);
evenComm.addPackInfo(make_shared< PackInfoEven_T >(pdfFieldID));
blockforest::communication::UniformBufferedScheme< Stencil_T > oddComm(blocks);
oddComm.addPackInfo(make_shared< PackInfoOdd_T >(pdfFieldID));
#endif
// create and initialize boundary handling
const FlagUID fluidFlagUID("Fluid");
auto boundariesConfig = config->getOneBlock("Boundaries");
std::function< Vector3< real_t >(const Cell&, const shared_ptr< StructuredBlockForest >&, IBlock&) >
velocity_initialisation = std::bind(VelocityCallback, _1, _2, _3, u_max, constant_inflow);
#if defined(WALBERLA_BUILD_WITH_CUDA)
lbm::FlowAroundSphereCodeGen_UBB ubb(blocks, pdfFieldIDGPU, velocity_initialisation);
lbm::FlowAroundSphereCodeGen_NoSlip noSlip(blocks, pdfFieldIDGPU);
lbm::FlowAroundSphereCodeGen_Outflow outflow(blocks, pdfFieldIDGPU, pdfFieldID);
lbm::FlowAroundSphereCodeGen_LbSweep lbSweep(densityFieldIDGPU, pdfFieldIDGPU, velFieldIDGPU, omega);
#else
lbm::FlowAroundSphereCodeGen_UBB ubb(blocks, pdfFieldID, velocity_initialisation);
lbm::FlowAroundSphereCodeGen_NoSlip noSlip(blocks, pdfFieldID);
lbm::FlowAroundSphereCodeGen_Outflow outflow(blocks, pdfFieldID);
lbm::FlowAroundSphereCodeGen_LbSweep lbSweep(densityFieldID, pdfFieldID, velFieldID, omega);
#endif
geometry::initBoundaryHandling< FlagField_T >(*blocks, flagFieldId, boundariesConfig);
geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldId, fluidFlagUID);
ubb.fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("UBB"), fluidFlagUID);
noSlip.fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("NoSlip"), fluidFlagUID);
outflow.fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("Outflow"), fluidFlagUID);
// create time loop
SweepTimeloop timeloop(blocks->getBlockStorage(), timesteps);
// Timestep Tracking and Sweeps
auto tracker = make_shared< lbm::TimestepTracker >(0);
AlternatingBeforeFunction communication(evenComm, oddComm, tracker);
// add LBM sweep and communication to time loop
timeloop.add() << BeforeFunction(communication, "communication") << Sweep(ubb.getSweep(tracker), "ubb boundary");
timeloop.add() << Sweep(outflow.getSweep(tracker), "outflow boundary");
timeloop.add() << Sweep(noSlip.getSweep(tracker), "noSlip boundary");
timeloop.add() << BeforeFunction(tracker->getAdvancementFunction(), "Timestep Advancement")
<< Sweep(lbSweep.getSweep(tracker), "LB update rule");
// LBM stability check
timeloop.addFuncAfterTimeStep(makeSharedFunctor(field::makeStabilityChecker< PdfField_T, FlagField_T >(
config, blocks, pdfFieldID, flagFieldId, fluidFlagUID)),
"LBM stability check");
// log remaining time
timeloop.addFuncAfterTimeStep(
timing::RemainingTimeLogger(timeloop.getNrOfTimeSteps(), remainingTimeLoggerFrequency),
"remaining time logger");
// add VTK output to time loop
uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
if (vtkWriteFrequency > 0)
{
auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
"simulation_step", false, true, true, false, 0);
#if defined(WALBERLA_BUILD_WITH_CUDA)
vtkOutput->addBeforeFunction([&]() {
gpu::fieldCpy< VelocityField_T, GPUField >(blocks, velFieldID, velFieldIDGPU);
gpu::fieldCpy< ScalarField_T, GPUField >(blocks, densityFieldID, densityFieldIDGPU);
});
#endif
auto velWriter = make_shared< field::VTKWriter< VelocityField_T > >(velFieldID, "velocity");
auto densityWriter = make_shared< field::VTKWriter< ScalarField_T > >(densityFieldID, "density");
FluidFilter_T filter(cellsPerBlock);
auto QCriterionWriter = make_shared< lbm::QCriterionVTKWriter< VelocityField_T, FluidFilter_T > >(
blocks, filter, velFieldID, "Q-Criterion");
vtkOutput->addCellDataWriter(velWriter);
vtkOutput->addCellDataWriter(densityWriter);
vtkOutput->addCellDataWriter(QCriterionWriter);
timeloop.addFuncAfterTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
}
WcTimer simTimer;
WALBERLA_LOG_INFO_ON_ROOT("Simulating flow around sphere:"
"\n timesteps: "
<< timesteps << "\n reynolds number: " << reynolds_number
<< "\n relaxation rate: " << omega << "\n maximum inflow velocity: " << u_max
<< "\n diameter_sphere: " << diameter_sphere)
simTimer.start();
timeloop.run();
simTimer.end();
WALBERLA_LOG_INFO_ON_ROOT("Simulation finished")
auto time = real_c(simTimer.last());
auto nrOfCells = real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]);
auto mlupsPerProcess = nrOfCells * real_c(timesteps) / time * 1e-6;
WALBERLA_LOG_RESULT_ON_ROOT("MLUPS per process " << mlupsPerProcess)
WALBERLA_LOG_RESULT_ON_ROOT("Time per time step " << time / real_c(timesteps))
}
return EXIT_SUCCESS;
}
} // namespace walberla
int main(int argc, char** argv) { walberla::main(argc, argv); }
from pystencils import Target
from pystencils.field import fields
from lbmpy import LBMConfig, LBMOptimisation, LBStencil, Method, Stencil
from lbmpy.advanced_streaming.utility import get_timesteps
from lbmpy.macroscopic_value_kernels import macroscopic_values_setter
from lbmpy.creationfunctions import create_lb_collision_rule
from lbmpy.boundaries import NoSlip, UBB, ExtrapolationOutflow
from pystencils_walberla import CodeGeneration, generate_sweep, generate_info_header
from lbmpy_walberla.additional_data_handler import UBBAdditionalDataHandler, OutflowAdditionalDataHandler
from lbmpy_walberla import generate_lb_pack_info
from lbmpy_walberla import generate_alternating_lbm_sweep, generate_alternating_lbm_boundary
import sympy as sp
with CodeGeneration() as ctx:
data_type = "float64" if ctx.double_accuracy else "float32"
stencil = LBStencil(Stencil.D3Q27)
q = stencil.Q
dim = stencil.D
streaming_pattern = 'esotwist'
timesteps = get_timesteps(streaming_pattern)
pdfs, velocity_field, density_field = fields(f"pdfs({q}), velocity({dim}), density(1) : {data_type}[{dim}D]",
layout='fzyx')
omega = sp.Symbol("omega")
u_max = sp.Symbol("u_max")
output = {
'density': density_field,
'velocity': velocity_field
}
lbm_config = LBMConfig(stencil=stencil, method=Method.CUMULANT, compressible=True,
relaxation_rate=omega, galilean_correction=True,
field_name='pdfs', streaming_pattern=streaming_pattern, output=output)
lbm_optimisation = LBMOptimisation(symbolic_field=pdfs, cse_global=False, cse_pdfs=False)
collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_optimisation)
lb_method = collision_rule.method
# getter & setter
setter_assignments = macroscopic_values_setter(lb_method, velocity=velocity_field.center_vector,
pdfs=pdfs, density=1.0,
streaming_pattern=streaming_pattern,
previous_timestep=timesteps[0])
setter_assignments = setter_assignments.new_without_unused_subexpressions()
# opt = {'instruction_set': 'sse', 'assume_aligned': True, 'nontemporal': False, 'assume_inner_stride_one': True}
stencil_typedefs = {'Stencil_T': stencil}
field_typedefs = {'PdfField_T': pdfs,
'VelocityField_T': velocity_field,
'ScalarField_T': density_field}
if ctx.cuda:
target = Target.GPU
else:
target = Target.CPU
# sweeps
generate_alternating_lbm_sweep(ctx, 'FlowAroundSphereCodeGen_LbSweep',
collision_rule, lbm_config=lbm_config, lbm_optimisation=lbm_optimisation,
target=target)
generate_sweep(ctx, 'FlowAroundSphereCodeGen_MacroSetter', setter_assignments, target=target)
# boundaries
ubb = UBB(lambda *args: None, dim=dim, data_type=data_type)
ubb_data_handler = UBBAdditionalDataHandler(stencil, ubb)
outflow = ExtrapolationOutflow(stencil[4], lb_method, streaming_pattern=streaming_pattern, data_type=data_type)
outflow_data_handler = OutflowAdditionalDataHandler(stencil, outflow, target=target)
generate_alternating_lbm_boundary(ctx, 'FlowAroundSphereCodeGen_UBB', ubb, lb_method,
target=target, streaming_pattern=streaming_pattern,
additional_data_handler=ubb_data_handler,
layout='fzyx')
generate_alternating_lbm_boundary(ctx, 'FlowAroundSphereCodeGen_NoSlip', NoSlip(), lb_method,
target=target, streaming_pattern=streaming_pattern,
layout='fzyx')
generate_alternating_lbm_boundary(ctx, 'FlowAroundSphereCodeGen_Outflow', outflow, lb_method,
target=target, streaming_pattern=streaming_pattern,
additional_data_handler=outflow_data_handler,
layout='fzyx')
# communication
generate_lb_pack_info(ctx, 'FlowAroundSphereCodeGen_PackInfo', stencil, pdfs,
streaming_pattern=streaming_pattern, always_generate_separate_classes=True, target=target)
# Info header containing correct template definitions for stencil and field
generate_info_header(ctx, 'FlowAroundSphereCodeGen_InfoHeader',
stencil_typedefs=stencil_typedefs, field_typedefs=field_typedefs)
import waLBerla as wlb
from lbmpy.relaxationrates import relaxation_rate_from_lattice_viscosity
class Scenario:
def __init__(self):
self.timesteps = 10
self.vtkWriteFrequency = 100
self.cells = (64, 32, 32)
self.blocks = (1, 1, 1)
self.periodic = (0, 0, 0)
self.constant_inflow = True
self.diameter_sphere = min(self.cells) // 2
self.u_max = 0.1
self.reynolds_number = 1000000
kinematic_viscosity = (self.diameter_sphere * self.u_max) / self.reynolds_number
self.omega = relaxation_rate_from_lattice_viscosity(kinematic_viscosity)
self.total_cells = (self.cells[0] * self.blocks[0],
self.cells[1] * self.blocks[1],
self.cells[2] * self.blocks[2])
@wlb.member_callback
def config(self):
return {
'DomainSetup': {
'blocks': self.blocks,
'cellsPerBlock': self.cells,
'periodic': self.periodic,
'oneBlockPerProcess': True
},
'Parameters': {
'timesteps': self.timesteps,
'vtkWriteFrequency': self.vtkWriteFrequency,
'omega': self.omega,
'u_max': self.u_max,
'reynolds_number': self.reynolds_number,
'diameter_sphere': self.diameter_sphere,
'constant_inflow': self.constant_inflow
},
'Boundaries': {
'Border': [
{'direction': 'N', 'walldistance': -1, 'flag': 'NoSlip'},
{'direction': 'S', 'walldistance': -1, 'flag': 'NoSlip'},
{'direction': 'W', 'walldistance': -1, 'flag': 'UBB'},
{'direction': 'E', 'walldistance': -1, 'flag': 'Outflow'},
{'direction': 'T', 'walldistance': -1, 'flag': 'NoSlip'},
{'direction': 'B', 'walldistance': -1, 'flag': 'NoSlip'},
],
'Body': [
{'shape': "sphere",
'midpoint': (int(0.40 * self.total_cells[0]), self.total_cells[1] // 2, self.total_cells[2] // 2),
'radius': self.diameter_sphere // 2,
'flag': 'NoSlip'}
],
},
}
scenarios = wlb.ScenarioManager()
scenarios.add(Scenario())
...@@ -11,11 +11,6 @@ waLBerla_generate_target_from_python(NAME NonUniformGridCPUGenerated ...@@ -11,11 +11,6 @@ waLBerla_generate_target_from_python(NAME NonUniformGridCPUGenerated
NonUniformGridCPUBoundaryCollection.h NonUniformGridCPUBoundaryCollection.h
NonUniformGridCPUInfoHeader.h) NonUniformGridCPUInfoHeader.h)
waLBerla_add_executable( NAME NonUniformGridGenerator
FILES NonUniformGridGenerator.cpp LdcSetup.h
DEPENDS blockforest core field python_coupling )
waLBerla_add_executable( NAME NonUniformGridCPU waLBerla_add_executable( NAME NonUniformGridCPU
FILES NonUniformGridCPU.cpp LdcSetup.h FILES NonUniformGridCPU.cpp LdcSetup.h GridGeneration.h
DEPENDS blockforest boundary core domain_decomposition field geometry lbm_generated python_coupling timeloop vtk NonUniformGridCPUGenerated ) DEPENDS blockforest boundary core domain_decomposition field geometry lbm_generated python_coupling timeloop vtk NonUniformGridCPUGenerated )
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file GridGeneration.h
//! \author Markus Holzer <markus.holzer@fau.de>
//
//======================================================================================================================
#pragma once
#include "blockforest/Initialization.h"
#include "blockforest/SetupBlock.h"
#include "blockforest/SetupBlockForest.h"
#include "blockforest/loadbalancing/StaticCurve.h"
#include "core/Environment.h"
#include "core/logging/Initialization.h"
#include "core/timing/RemainingTimeLogger.h"
#include "core/timing/TimingPool.h"
#include <string>
#include "LdcSetup.h"
#include "NonUniformGridCPUInfoHeader.h"
using StorageSpecification_T = lbm::NonUniformGridCPUStorageSpecification;
using Stencil_T = StorageSpecification_T::Stencil;
using namespace walberla;
void createSetupBlockForest(SetupBlockForest& setupBfs,
const Config::BlockHandle& domainSetup, const Config::BlockHandle& blockForestSetup,
const bool useMPIManager=false)
{
WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...")
Vector3<real_t> domainSize = domainSetup.getParameter<Vector3<real_t> >("domainSize");
Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
Vector3<uint_t> rootBlocks = domainSetup.getParameter<Vector3<uint_t> >("rootBlocks");
Vector3<bool> periodic = domainSetup.getParameter<Vector3<bool> >("periodic");
const uint_t refinementDepth = blockForestSetup.getParameter< uint_t >("refinementDepth", uint_c(1));
uint_t numProcesses = blockForestSetup.getParameter< uint_t >( "numProcesses");
const std::string blockForestFilestem = blockForestSetup.getParameter< std::string > ("blockForestFilestem", "blockforest");
const bool writeVtk = blockForestSetup.getParameter< bool >("writeVtk", false);
const bool outputStatistics = blockForestSetup.getParameter< bool >("outputStatistics", false);
if(useMPIManager)
numProcesses = uint_c(mpi::MPIManager::instance()->numProcesses());
const LDC ldc(refinementDepth);
auto refSelection = ldc.refinementSelector();
setupBfs.addRefinementSelectionFunction(std::function<void(SetupBlockForest &)>(refSelection));
const AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), domainSize[0], domainSize[1], domainSize[2]);
setupBfs.addWorkloadMemorySUIDAssignmentFunction(blockforest::uniformWorkloadAndMemoryAssignment);
setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2], periodic[0], periodic[1], periodic[2]);
setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numProcesses);
if(mpi::MPIManager::instance()->numProcesses() > 1)
return;
{
std::ostringstream oss;
oss << blockForestFilestem << ".bfs";
setupBfs.saveToFile(oss.str().c_str());
}
if(writeVtk){
setupBfs.writeVTKOutput(blockForestFilestem);
}
if(outputStatistics){
WALBERLA_LOG_INFO_ON_ROOT("=========================== BLOCK FOREST STATISTICS ============================");
WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks())
for (uint_t level = 0; level <= refinementDepth; level++)
{
const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level);
WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks)
}
const real_t avgBlocksPerProc = real_c(setupBfs.getNumberOfBlocks()) / real_c(setupBfs.getNumberOfProcesses());
WALBERLA_LOG_INFO_ON_ROOT("Average blocks per process: " << avgBlocksPerProc);
const uint_t totalNumberCells = setupBfs.getNumberOfBlocks() * cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2];
const real_t averageCellsPerGPU = avgBlocksPerProc * real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]);
const uint_t PDFsPerCell = StorageSpecification_T::inplace ? Stencil_T::Q : 2 * Stencil_T::Q;
const uint_t valuesPerCell = (PDFsPerCell + VelocityField_T::F_SIZE + ScalarField_T::F_SIZE);
const uint_t sizePerValue = sizeof(StorageSpecification_T::value_type);
const double expectedMemory = double_c(totalNumberCells * valuesPerCell * sizePerValue) * 1e-9;
const double expectedMemoryPerGPU = double_c(averageCellsPerGPU * valuesPerCell * sizePerValue) * 1e-9;
WALBERLA_LOG_INFO_ON_ROOT( "Total number of cells will be " << totalNumberCells << " fluid cells (in total on all levels)")
WALBERLA_LOG_INFO_ON_ROOT( "Expected total memory demand will be " << expectedMemory << " GB")
WALBERLA_LOG_INFO_ON_ROOT( "Average memory demand per GPU will be " << expectedMemoryPerGPU << " GB")
WALBERLA_LOG_INFO_ON_ROOT("=================================================================================");
}
}
void createBlockForest(shared_ptr< BlockForest >& bfs,
const Config::BlockHandle& domainSetup, const Config::BlockHandle& blockForestSetup)
{
if (mpi::MPIManager::instance()->numProcesses() > 1)
{
const std::string blockForestFilestem =
blockForestSetup.getParameter< std::string >("blockForestFilestem", "blockforest");
// Load structured block forest from file
std::ostringstream oss;
oss << blockForestFilestem << ".bfs";
const std::string setupBlockForestFilepath = oss.str();
std::ifstream infile(setupBlockForestFilepath.c_str());
if(!infile.good())
{
WALBERLA_LOG_WARNING_ON_ROOT("Blockforest was not created beforehand and thus needs to be created on the fly. For large simulation runs this can be a severe problem!")
SetupBlockForest setupBfs;
createSetupBlockForest(setupBfs, domainSetup, blockForestSetup, true);
bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
}
else
{
bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()),
setupBlockForestFilepath.c_str(), false);
}
}
else
{
SetupBlockForest setupBfs;
createSetupBlockForest(setupBfs, domainSetup, blockForestSetup);
bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
}
}
\ No newline at end of file
...@@ -48,14 +48,8 @@ class LDCRefinement ...@@ -48,14 +48,8 @@ class LDCRefinement
{ {
const AABB & domain = forest.getDomain(); const AABB & domain = forest.getDomain();
const real_t xSize = ( domain.xSize() / real_t(12) ) * real_c( 0.99 ); const AABB leftCorner( 0, domain.yMax() -1, 0, 1, domain.yMax() , domain.zMax() );
const real_t ySize = ( domain.ySize() / real_t(12) ) * real_c( 0.99 ); const AABB rightCorner( domain.xMax() - 1, domain.yMax() -1, 0, domain.xMax(), domain.yMax() , domain.zMax() );
const AABB leftCorner( domain.xMin(), domain.yMin(), domain.zMin(),
domain.xMin() + xSize, domain.yMin() + ySize, domain.zMax() );
const AABB rightCorner( domain.xMax() - xSize, domain.yMin(), domain.zMin(),
domain.xMax(), domain.yMin() + ySize, domain.zMax() );
for(auto & block : forest) for(auto & block : forest)
{ {
......
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#include <cmath> #include <cmath>
#include "GridGeneration.h"
#include "LdcSetup.h" #include "LdcSetup.h"
#include "NonUniformGridCPUInfoHeader.h" #include "NonUniformGridCPUInfoHeader.h"
#include "lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h" #include "lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h"
...@@ -77,23 +78,25 @@ int main(int argc, char** argv) ...@@ -77,23 +78,25 @@ int main(int argc, char** argv)
auto config = *cfg; auto config = *cfg;
logging::configureLogging(config); logging::configureLogging(config);
auto domainSetup = config->getOneBlock("DomainSetup");
auto blockForestSetup = config->getOneBlock("SetupBlockForest"); auto blockForestSetup = config->getOneBlock("SetupBlockForest");
const bool writeSetupForestAndReturn = blockForestSetup.getParameter< bool >("writeSetupForestAndReturn", true);
const std::string blockForestFilestem = const std::string blockForestFilestem =
blockForestSetup.getParameter< std::string >("blockForestFilestem", "blockforest"); blockForestSetup.getParameter< std::string >("blockForestFilestem", "blockforest");
const uint_t refinementDepth = blockForestSetup.getParameter< uint_t >("refinementDepth", uint_c(1)); const uint_t refinementDepth = blockForestSetup.getParameter< uint_t >("refinementDepth", uint_c(1));
auto domainSetup = config->getOneBlock("DomainSetup");
Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock"); Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
// Load structured block forest from file shared_ptr< BlockForest > bfs;
std::ostringstream oss; createBlockForest(bfs, domainSetup, blockForestSetup);
oss << blockForestFilestem << ".bfs";
const std::string setupBlockForestFilepath = oss.str(); if (writeSetupForestAndReturn && mpi::MPIManager::instance()->numProcesses() == 1)
{
WALBERLA_LOG_INFO_ON_ROOT("BlockForest has been created and writen to file. Returning program")
return EXIT_SUCCESS;
}
WALBERLA_LOG_INFO_ON_ROOT("Creating structured block forest...")
auto bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()),
setupBlockForestFilepath.c_str(), false);
auto blocks = auto blocks =
std::make_shared< StructuredBlockForest >(bfs, cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2]); std::make_shared< StructuredBlockForest >(bfs, cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2]);
blocks->createCellBoundingBoxes(); blocks->createCellBoundingBoxes();
...@@ -132,7 +135,7 @@ int main(int argc, char** argv) ...@@ -132,7 +135,7 @@ int main(int argc, char** argv)
SweepCollection_T sweepCollection(blocks, pdfFieldID, densityFieldID, velFieldID, omega, innerOuterSplit); SweepCollection_T sweepCollection(blocks, pdfFieldID, densityFieldID, velFieldID, omega, innerOuterSplit);
for (auto& block : *blocks) for (auto& block : *blocks)
{ {
sweepCollection.initialise(&block, 2); sweepCollection.initialise(&block, cell_idx_c(1));
} }
WALBERLA_MPI_BARRIER() WALBERLA_MPI_BARRIER()
WALBERLA_LOG_INFO_ON_ROOT("Initialisation done") WALBERLA_LOG_INFO_ON_ROOT("Initialisation done")
...@@ -173,6 +176,8 @@ int main(int argc, char** argv) ...@@ -173,6 +176,8 @@ int main(int argc, char** argv)
const uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0); const uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
const bool useVTKAMRWriter = parameters.getParameter< bool >("useVTKAMRWriter", false); const bool useVTKAMRWriter = parameters.getParameter< bool >("useVTKAMRWriter", false);
const bool oneFilePerProcess = parameters.getParameter< bool >("oneFilePerProcess", false); const bool oneFilePerProcess = parameters.getParameter< bool >("oneFilePerProcess", false);
auto finalDomain = blocks->getDomain();
if (vtkWriteFrequency > 0) if (vtkWriteFrequency > 0)
{ {
auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out", auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
...@@ -180,6 +185,12 @@ int main(int argc, char** argv) ...@@ -180,6 +185,12 @@ int main(int argc, char** argv)
auto velWriter = make_shared< field::VTKWriter< VelocityField_T, float32 > >(velFieldID, "vel"); auto velWriter = make_shared< field::VTKWriter< VelocityField_T, float32 > >(velFieldID, "vel");
vtkOutput->addCellDataWriter(velWriter); vtkOutput->addCellDataWriter(velWriter);
if (parameters.getParameter< bool >("writeOnlySlice", true)){
const AABB sliceXY(finalDomain.xMin(), finalDomain.yMin(), finalDomain.center()[2] - blocks->dz(refinementDepth),
finalDomain.xMax(), finalDomain.yMax(), finalDomain.center()[2] + blocks->dz(refinementDepth));
vtkOutput->addCellInclusionFilter(vtk::AABBCellFilter(sliceXY));
}
vtkOutput->addBeforeFunction([&]() { vtkOutput->addBeforeFunction([&]() {
for (auto& block : *blocks) for (auto& block : *blocks)
sweepCollection.calculateMacroscopicParameters(&block); sweepCollection.calculateMacroscopicParameters(&block);
...@@ -236,6 +247,8 @@ int main(int argc, char** argv) ...@@ -236,6 +247,8 @@ int main(int argc, char** argv)
pythonCallbackResults.data().exposeValue("numProcesses", performance.processes()); pythonCallbackResults.data().exposeValue("numProcesses", performance.processes());
pythonCallbackResults.data().exposeValue("numThreads", performance.threads()); pythonCallbackResults.data().exposeValue("numThreads", performance.threads());
pythonCallbackResults.data().exposeValue("numCores", performance.cores()); pythonCallbackResults.data().exposeValue("numCores", performance.cores());
pythonCallbackResults.data().exposeValue("numberOfCells", performance.numberOfCells());
pythonCallbackResults.data().exposeValue("numberOfFluidCells", performance.numberOfFluidCells());
pythonCallbackResults.data().exposeValue("mlups", performance.mlups(timesteps, time)); pythonCallbackResults.data().exposeValue("mlups", performance.mlups(timesteps, time));
pythonCallbackResults.data().exposeValue("mlupsPerCore", performance.mlupsPerCore(timesteps, time)); pythonCallbackResults.data().exposeValue("mlupsPerCore", performance.mlupsPerCore(timesteps, time));
pythonCallbackResults.data().exposeValue("mlupsPerProcess", pythonCallbackResults.data().exposeValue("mlupsPerProcess",
......
...@@ -23,17 +23,22 @@ const bool infoCsePdfs = {cse_pdfs}; ...@@ -23,17 +23,22 @@ const bool infoCsePdfs = {cse_pdfs};
with CodeGeneration() as ctx: with CodeGeneration() as ctx:
field_type = "float64" if ctx.double_accuracy else "float32" field_type = "float64" if ctx.double_accuracy else "float32"
cpu_vec = {"instruction_set": None}
streaming_pattern = 'aa' streaming_pattern = 'pull'
timesteps = get_timesteps(streaming_pattern) timesteps = get_timesteps(streaming_pattern)
stencil = LBStencil(Stencil.D3Q19) stencil = LBStencil(Stencil.D3Q27)
method_enum = Method.CUMULANT
fourth_order_correction = 0.01 if method_enum == Method.CUMULANT and stencil.Q == 27 else False
collision_setup = "cumulant-K17" if fourth_order_correction else method_enum.name.lower()
assert stencil.D == 3, "This application supports only three-dimensional stencils" assert stencil.D == 3, "This application supports only three-dimensional stencils"
pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {field_type}[3D]", layout='fzyx') pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {field_type}[3D]", layout='fzyx')
density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_type}[3D]", layout='fzyx') density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_type}[3D]", layout='fzyx')
macroscopic_fields = {'density': density_field, 'velocity': velocity_field} macroscopic_fields = {'density': density_field, 'velocity': velocity_field}
lbm_config = LBMConfig(stencil=stencil, method=Method.SRT, relaxation_rate=omega, compressible=True, lbm_config = LBMConfig(stencil=stencil, method=method_enum, relaxation_rate=omega, compressible=True,
fourth_order_correction=fourth_order_correction,
streaming_pattern=streaming_pattern) streaming_pattern=streaming_pattern)
lbm_opt = LBMOptimisation(cse_global=False, field_layout="fzyx") lbm_opt = LBMOptimisation(cse_global=False, field_layout="fzyx")
...@@ -50,12 +55,12 @@ with CodeGeneration() as ctx: ...@@ -50,12 +55,12 @@ with CodeGeneration() as ctx:
lbm_config=lbm_config, lbm_optimisation=lbm_opt, lbm_config=lbm_config, lbm_optimisation=lbm_opt,
nonuniform=True, boundaries=[no_slip, ubb], nonuniform=True, boundaries=[no_slip, ubb],
macroscopic_fields=macroscopic_fields, macroscopic_fields=macroscopic_fields,
target=ps.Target.CPU) target=ps.Target.CPU, cpu_vectorize_info=cpu_vec,)
infoHeaderParams = { infoHeaderParams = {
'stencil': stencil.name.lower(), 'stencil': stencil.name.lower(),
'streaming_pattern': streaming_pattern, 'streaming_pattern': streaming_pattern,
'collision_setup': lbm_config.method.name.lower(), 'collision_setup': collision_setup,
'cse_global': int(lbm_opt.cse_global), 'cse_global': int(lbm_opt.cse_global),
'cse_pdfs': int(lbm_opt.cse_pdfs), 'cse_pdfs': int(lbm_opt.cse_pdfs),
} }
......
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file NonUniformGridGenerator.cpp
//! \author Frederik Hennig <frederik.hennig@fau.de>
//
//======================================================================================================================
#include "blockforest/Initialization.h"
#include "blockforest/SetupBlock.h"
#include "blockforest/SetupBlockForest.h"
#include "blockforest/loadbalancing/StaticCurve.h"
#include "core/all.h"
#include "python_coupling/CreateConfig.h"
#include <string>
#include "LdcSetup.h"
using namespace walberla;
int main(int argc, char ** argv){
const mpi::Environment env(argc, argv);
mpi::MPIManager::instance()->useWorldComm();
if(mpi::MPIManager::instance()->numProcesses() > 1){
WALBERLA_ABORT("Commandment: Thou shalt not run thy grid generator with more than one process.");
}
for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg)
{
auto config = *cfg;
auto domainSetup = config->getOneBlock("DomainSetup");
Vector3<real_t> domainSize = domainSetup.getParameter<Vector3<real_t> >("domainSize");
Vector3<uint_t> rootBlocks = domainSetup.getParameter<Vector3<uint_t> >("rootBlocks");
Vector3<bool> periodic = domainSetup.getParameter<Vector3<bool> >("periodic");
auto blockForestSetup = config->getOneBlock("SetupBlockForest");
const uint_t refinementDepth = blockForestSetup.getParameter< uint_t >("refinementDepth", uint_c(1));
const uint_t numProcesses = blockForestSetup.getParameter< uint_t >( "numProcesses");
const std::string blockForestFilestem = blockForestSetup.getParameter< std::string > ("blockForestFilestem", "blockforest");
const bool writeVtk = blockForestSetup.getParameter< bool >("writeVtk", false);
const bool outputStatistics = blockForestSetup.getParameter< bool >("outputStatistics", false);
const LDC ldc(refinementDepth);
SetupBlockForest setupBfs;
auto refSelection = ldc.refinementSelector();
setupBfs.addRefinementSelectionFunction(std::function<void(SetupBlockForest &)>(refSelection));
const AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), domainSize[0], domainSize[1], domainSize[2]);
setupBfs.addWorkloadMemorySUIDAssignmentFunction(blockforest::uniformWorkloadAndMemoryAssignment);
setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2], periodic[0], periodic[1], periodic[2]);
setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numProcesses);
{
std::ostringstream oss;
oss << blockForestFilestem << ".bfs";
setupBfs.saveToFile(oss.str().c_str());
}
if(writeVtk){
setupBfs.writeVTKOutput(blockForestFilestem);
}
if(outputStatistics){
WALBERLA_LOG_INFO_ON_ROOT("=========================== BLOCK FOREST STATISTICS ============================");
WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks())
for (uint_t level = 0; level <= refinementDepth; level++)
{
const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level);
WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks)
}
const uint_t avgBlocksPerProc = setupBfs.getNumberOfBlocks() / setupBfs.getNumberOfProcesses();
WALBERLA_LOG_INFO_ON_ROOT("Average blocks per process: " << avgBlocksPerProc);
WALBERLA_LOG_INFO_ON_ROOT("=================================================================================");
}
WALBERLA_LOG_INFO_ON_ROOT("Ending program")
}
}
import waLBerla as wlb import waLBerla as wlb
from waLBerla.tools.config import block_decomposition
from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle
import sqlite3 import sqlite3
import os import os
import sys import sys
try:
import machinestate as ms
except ImportError:
ms = None
DB_FILE = os.environ.get('DB_FILE', "cpu_benchmark.sqlite3") DB_FILE = os.environ.get('DB_FILE', "cpu_benchmark.sqlite3")
BENCHMARK = int(os.environ.get('BENCHMARK', 0))
WeakX = int(os.environ.get('WeakX', 128))
WeakY = int(os.environ.get('WeakY', 128))
WeakZ = int(os.environ.get('WeakZ', 128))
StrongX = int(os.environ.get('StrongX', 128))
StrongY = int(os.environ.get('StrongY', 128))
StrongZ = int(os.environ.get('StrongZ', 128))
class Scenario: class Scenario:
...@@ -18,7 +33,8 @@ class Scenario: ...@@ -18,7 +33,8 @@ class Scenario:
vtk_write_frequency=0, vtk_write_frequency=0,
logger_frequency=0, logger_frequency=0,
blockforest_filestem="blockforest", blockforest_filestem="blockforest",
write_setup_vtk=False): write_setup_vtk=True,
db_file_name=None):
self.domain_size = domain_size self.domain_size = domain_size
self.root_blocks = root_blocks self.root_blocks = root_blocks
...@@ -34,6 +50,8 @@ class Scenario: ...@@ -34,6 +50,8 @@ class Scenario:
self.vtk_write_frequency = vtk_write_frequency self.vtk_write_frequency = vtk_write_frequency
self.logger_frequency = logger_frequency self.logger_frequency = logger_frequency
self.db_file_name = DB_FILE if db_file_name is None else db_file_name
self.config_dict = self.config(print_dict=False) self.config_dict = self.config(print_dict=False)
@wlb.member_callback @wlb.member_callback
...@@ -51,7 +69,8 @@ class Scenario: ...@@ -51,7 +69,8 @@ class Scenario:
'numProcesses': self.num_processes, 'numProcesses': self.num_processes,
'blockForestFilestem': self.bfs_filestem, 'blockForestFilestem': self.bfs_filestem,
'writeVtk': self.write_setup_vtk, 'writeVtk': self.write_setup_vtk,
'outputStatistics': False 'outputStatistics': True,
'writeSetupForestAndReturn': True,
}, },
'Parameters': { 'Parameters': {
'omega': 1.95, 'omega': 1.95,
...@@ -59,14 +78,15 @@ class Scenario: ...@@ -59,14 +78,15 @@ class Scenario:
'remainingTimeLoggerFrequency': self.logger_frequency, 'remainingTimeLoggerFrequency': self.logger_frequency,
'vtkWriteFrequency': self.vtk_write_frequency, 'vtkWriteFrequency': self.vtk_write_frequency,
'useVTKAMRWriter': True, 'useVTKAMRWriter': True,
'oneFilePerProcess': False 'oneFilePerProcess': False,
'writeOnlySlice': False
}, },
'Logging': { 'Logging': {
'logLevel': "info", 'logLevel': "info",
} }
} }
if (print_dict): if print_dict:
wlb.log_info_on_root("Scenario:\n" + pformat(config_dict)) wlb.log_info_on_root("Scenario:\n" + pformat(config_dict))
return config_dict return config_dict
...@@ -82,6 +102,15 @@ class Scenario: ...@@ -82,6 +102,15 @@ class Scenario:
data['compile_flags'] = wlb.build_info.compiler_flags data['compile_flags'] = wlb.build_info.compiler_flags
data['walberla_version'] = wlb.build_info.version data['walberla_version'] = wlb.build_info.version
data['build_machine'] = wlb.build_info.build_machine data['build_machine'] = wlb.build_info.build_machine
if ms:
state = ms.MachineState(extended=False, anonymous=True)
state.generate() # generate subclasses
state.update() # read information
data["MachineState"] = str(state.get())
else:
print("MachineState module is not available. MachineState was not saved")
sequenceValuesToScalars(data) sequenceValuesToScalars(data)
result = data result = data
...@@ -92,52 +121,109 @@ class Scenario: ...@@ -92,52 +121,109 @@ class Scenario:
table_name = table_name.replace("-", "_") table_name = table_name.replace("-", "_")
for num_try in range(num_tries): for num_try in range(num_tries):
try: try:
checkAndUpdateSchema(result, table_name, DB_FILE) checkAndUpdateSchema(result, table_name, self.db_file_name)
storeSingle(result, table_name, DB_FILE) storeSingle(result, table_name, self.db_file_name)
break break
except sqlite3.OperationalError as e: except sqlite3.OperationalError as e:
wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries} {str(e)}") wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries} {str(e)}")
def validation_run(): def weak_scaling_ldc(num_proc, uniform=False):
"""Run with full periodic shear flow or boundary scenario (ldc) to check if the code works""" wlb.log_info_on_root("Running weak scaling benchmark...")
wlb.log_info_on_root("Validation run")
domain_size = (96, 96, 96) # This benchmark must run from 16 processes onwards
cells_per_block = (32, 32, 32) if wlb.mpi.numProcesses() > 1:
num_proc = wlb.mpi.numProcesses()
if uniform:
factor = 3 * num_proc
name = "uniform"
else:
if num_proc % 16 != 0:
raise RuntimeError("Number of processes must be dividable by 16")
factor = int(num_proc // 16)
name = "nonuniform"
cells_per_block = (WeakX, WeakY, WeakZ)
domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor)
root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)]) root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
scenarios = wlb.ScenarioManager() scenarios = wlb.ScenarioManager()
scenario = Scenario(domain_size=domain_size, scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}",
domain_size=domain_size,
root_blocks=root_blocks, root_blocks=root_blocks,
num_processes=1, num_processes=num_proc,
refinement_depth=1,
cells_per_block=cells_per_block, cells_per_block=cells_per_block,
timesteps=201, refinement_depth=0 if uniform else 3,
vtk_write_frequency=100, timesteps=10,
logger_frequency=5, db_file_name=f"weakScalingCPU{name}LDC.sqlite3")
write_setup_vtk=True) scenarios.add(scenario)
def strong_scaling_ldc(num_proc, uniform=False):
wlb.log_info_on_root("Running strong scaling benchmark...")
# This benchmark must run from 64 GPUs onwards
if wlb.mpi.numProcesses() > 1:
num_proc = wlb.mpi.numProcesses()
if num_proc % 64 != 0:
raise RuntimeError("Number of processes must be dividable by 64")
cells_per_block = (StrongX, StrongY, StrongZ)
if uniform:
domain_size = (cells_per_block[0] * 2, cells_per_block[1] * 2, cells_per_block[2] * 16)
name = "uniform"
else:
factor = int(num_proc / 64)
blocks64 = block_decomposition(factor)
cells_per_block = tuple([int(c / b) for c, b in zip(cells_per_block, reversed(blocks64))])
domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor)
name = "nonuniform"
root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
scenarios = wlb.ScenarioManager()
scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}",
domain_size=domain_size,
root_blocks=root_blocks,
num_processes=num_proc,
cells_per_block=cells_per_block,
refinement_depth=0 if uniform else 3,
timesteps=10,
db_file_name=f"strongScalingCPU{name}LDC.sqlite3")
scenarios.add(scenario) scenarios.add(scenario)
def scaling(): def validation_run():
wlb.log_info_on_root("Running scaling benchmark...") """Run with full periodic shear flow or boundary scenario (ldc) to check if the code works"""
wlb.log_info_on_root("Validation run")
numProc = wlb.mpi.numProcesses() domain_size = (96, 96, 32)
cells_per_block = (32, 32, 32)
domain_size = (256, 256, 128 * numProc)
cells_per_block = (64, 64, 64)
root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)]) root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
scenarios = wlb.ScenarioManager() scenarios = wlb.ScenarioManager()
scenario = Scenario(domain_size=domain_size, scenario = Scenario(domain_size=domain_size,
root_blocks=root_blocks, root_blocks=root_blocks,
num_processes=1,
refinement_depth=3,
cells_per_block=cells_per_block, cells_per_block=cells_per_block,
refinement_depth=2, timesteps=1001,
timesteps=10) vtk_write_frequency=100,
logger_frequency=5,
write_setup_vtk=True)
scenarios.add(scenario) scenarios.add(scenario)
validation_run() if BENCHMARK == 0:
# scaling() validation_run()
elif BENCHMARK == 1:
weak_scaling_ldc(1, False)
elif BENCHMARK == 2:
strong_scaling_ldc(1, False)
else:
print(f"Invalid benchmark case {BENCHMARK}")
...@@ -11,5 +11,5 @@ waLBerla_generate_target_from_python(NAME NonUniformGridGPUGenerated ...@@ -11,5 +11,5 @@ waLBerla_generate_target_from_python(NAME NonUniformGridGPUGenerated
NonUniformGridGPUBoundaryCollection.h NonUniformGridGPUBoundaryCollection.h
NonUniformGridGPUInfoHeader.h) NonUniformGridGPUInfoHeader.h)
waLBerla_add_executable( NAME NonUniformGridGPU waLBerla_add_executable( NAME NonUniformGridGPU
FILES NonUniformGridGPU.cpp LdcSetup.h FILES NonUniformGridGPU.cpp LdcSetup.h GridGeneration.h
DEPENDS blockforest boundary core gpu domain_decomposition field geometry lbm_generated python_coupling timeloop vtk NonUniformGridGPUGenerated ) DEPENDS blockforest boundary core gpu domain_decomposition field geometry lbm_generated python_coupling timeloop vtk NonUniformGridGPUGenerated )
\ No newline at end of file
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file GridGeneration.h
//! \author Markus Holzer <markus.holzer@fau.de>
//
//======================================================================================================================
#pragma once
#include "blockforest/Initialization.h"
#include "blockforest/SetupBlock.h"
#include "blockforest/SetupBlockForest.h"
#include "blockforest/loadbalancing/StaticCurve.h"
#include "core/Environment.h"
#include "core/logging/Initialization.h"
#include "core/timing/RemainingTimeLogger.h"
#include "core/timing/TimingPool.h"
#include <string>
#include "LdcSetup.h"
#include "NonUniformGridGPUInfoHeader.h"
using StorageSpecification_T = lbm::NonUniformGridGPUStorageSpecification;
using Stencil_T = StorageSpecification_T::Stencil;
using namespace walberla;
void createSetupBlockForest(SetupBlockForest& setupBfs,
const Config::BlockHandle& domainSetup, const Config::BlockHandle& blockForestSetup,
const bool useMPIManager=false)
{
WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...")
Vector3<real_t> domainSize = domainSetup.getParameter<Vector3<real_t> >("domainSize");
Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
Vector3<uint_t> rootBlocks = domainSetup.getParameter<Vector3<uint_t> >("rootBlocks");
Vector3<bool> periodic = domainSetup.getParameter<Vector3<bool> >("periodic");
const uint_t refinementDepth = blockForestSetup.getParameter< uint_t >("refinementDepth", uint_c(1));
uint_t numProcesses = blockForestSetup.getParameter< uint_t >( "numProcesses");
const std::string blockForestFilestem = blockForestSetup.getParameter< std::string > ("blockForestFilestem", "blockforest");
const bool writeVtk = blockForestSetup.getParameter< bool >("writeVtk", false);
const bool outputStatistics = blockForestSetup.getParameter< bool >("outputStatistics", false);
if(useMPIManager)
numProcesses = uint_c(mpi::MPIManager::instance()->numProcesses());
const LDC ldc(refinementDepth);
auto refSelection = ldc.refinementSelector();
setupBfs.addRefinementSelectionFunction(std::function<void(SetupBlockForest &)>(refSelection));
const AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), domainSize[0], domainSize[1], domainSize[2]);
setupBfs.addWorkloadMemorySUIDAssignmentFunction(blockforest::uniformWorkloadAndMemoryAssignment);
setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2], periodic[0], periodic[1], periodic[2]);
setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numProcesses);
if(mpi::MPIManager::instance()->numProcesses() > 1)
return;
{
std::ostringstream oss;
oss << blockForestFilestem << ".bfs";
setupBfs.saveToFile(oss.str().c_str());
}
if(writeVtk){
setupBfs.writeVTKOutput(blockForestFilestem);
}
if(outputStatistics){
WALBERLA_LOG_INFO_ON_ROOT("=========================== BLOCK FOREST STATISTICS ============================");
WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks())
for (uint_t level = 0; level <= refinementDepth; level++){
const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level);
WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks)
}
const real_t avgBlocksPerProc = real_c(setupBfs.getNumberOfBlocks()) / real_c(setupBfs.getNumberOfProcesses());
WALBERLA_LOG_INFO_ON_ROOT("Average blocks per process: " << avgBlocksPerProc);
const uint_t totalNumberCells = setupBfs.getNumberOfBlocks() * cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2];
const real_t averageCellsPerGPU = avgBlocksPerProc * real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]);
const uint_t PDFsPerCell = StorageSpecification_T::inplace ? Stencil_T::Q : 2 * Stencil_T::Q;
const uint_t valuesPerCell = (PDFsPerCell + VelocityField_T::F_SIZE + ScalarField_T::F_SIZE);
const uint_t sizePerValue = sizeof(StorageSpecification_T::value_type);
const double expectedMemory = double_c(totalNumberCells * valuesPerCell * sizePerValue) * 1e-9;
const double expectedMemoryPerGPU = double_c(averageCellsPerGPU * valuesPerCell * sizePerValue) * 1e-9;
WALBERLA_LOG_INFO_ON_ROOT( "Total number of cells will be " << totalNumberCells << " fluid cells (in total on all levels)")
WALBERLA_LOG_INFO_ON_ROOT( "Expected total memory demand will be " << expectedMemory << " GB")
WALBERLA_LOG_INFO_ON_ROOT( "Average memory demand per GPU will be " << expectedMemoryPerGPU << " GB")
WALBERLA_LOG_INFO_ON_ROOT("=================================================================================");
}
}
void createBlockForest(shared_ptr< BlockForest >& bfs,
const Config::BlockHandle& domainSetup, const Config::BlockHandle& blockForestSetup)
{
if (mpi::MPIManager::instance()->numProcesses() > 1){
const std::string blockForestFilestem =
blockForestSetup.getParameter< std::string >("blockForestFilestem", "blockforest");
// Load structured block forest from file
std::ostringstream oss;
oss << blockForestFilestem << ".bfs";
const std::string setupBlockForestFilepath = oss.str();
std::ifstream infile(setupBlockForestFilepath.c_str());
if(!infile.good()){
WALBERLA_LOG_WARNING_ON_ROOT("Blockforest was not created beforehand and thus needs to be created on the fly. For large simulation runs this can be a severe problem!")
SetupBlockForest setupBfs;
createSetupBlockForest(setupBfs, domainSetup, blockForestSetup, true);
bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
}
else{
bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()),
setupBlockForestFilepath.c_str(), false);
}
}
else{
SetupBlockForest setupBfs;
createSetupBlockForest(setupBfs, domainSetup, blockForestSetup);
bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
}
}
\ No newline at end of file
...@@ -31,7 +31,9 @@ ...@@ -31,7 +31,9 @@
#include "field/FlagUID.h" #include "field/FlagUID.h"
using namespace walberla; using namespace walberla;
using RefinementSelectionFunctor = SetupBlockForest::RefinementSelectionFunction; using RefinementSelectionFunctor = SetupBlockForest::RefinementSelectionFunction;
using FlagField_T = FlagField< uint8_t >; using FlagField_T = FlagField< uint8_t >;
class LDCRefinement class LDCRefinement
...@@ -46,14 +48,8 @@ class LDCRefinement ...@@ -46,14 +48,8 @@ class LDCRefinement
{ {
const AABB & domain = forest.getDomain(); const AABB & domain = forest.getDomain();
const real_t xSize = ( domain.xSize() / real_t(12) ) * real_c( 0.99 ); const AABB leftCorner( 0, domain.yMax() -1, 0, 1, domain.yMax() , domain.zMax() );
const real_t ySize = ( domain.ySize() / real_t(12) ) * real_c( 0.99 ); const AABB rightCorner( domain.xMax() - 1, domain.yMax() -1, 0, domain.xMax(), domain.yMax() , domain.zMax() );
const AABB leftCorner( domain.xMin(), domain.yMin(), domain.zMin(),
domain.xMin() + xSize, domain.yMin() + ySize, domain.zMax() );
const AABB rightCorner( domain.xMax() - xSize, domain.yMin(), domain.zMin(),
domain.xMax(), domain.yMin() + ySize, domain.zMax() );
for(auto & block : forest) for(auto & block : forest)
{ {
...@@ -99,8 +95,7 @@ class LDC ...@@ -99,8 +95,7 @@ class LDC
Cell globalCell(localCell); Cell globalCell(localCell);
sbfs.transformBlockLocalToGlobalCell(globalCell, b); sbfs.transformBlockLocalToGlobalCell(globalCell, b);
if (globalCell.y() >= cell_idx_c(sbfs.getNumberOfYCells(level))) { flagField->addFlag(localCell, ubbFlag); } if (globalCell.y() >= cell_idx_c(sbfs.getNumberOfYCells(level))) { flagField->addFlag(localCell, ubbFlag); }
else if (globalCell.z() < 0 || globalCell.y() < 0 || globalCell.x() < 0 || else if (globalCell.y() < 0 || globalCell.x() < 0 || globalCell.x() >= cell_idx_c(sbfs.getNumberOfXCells(level)))
globalCell.x() >= cell_idx_c(sbfs.getNumberOfXCells(level)) || globalCell.z() >= cell_idx_c(sbfs.getNumberOfZCells(level)))
{ {
flagField->addFlag(localCell, noslipFlag); flagField->addFlag(localCell, noslipFlag);
} }
......
...@@ -7,7 +7,7 @@ from pystencils.typing import TypedSymbol ...@@ -7,7 +7,7 @@ from pystencils.typing import TypedSymbol
from lbmpy.advanced_streaming.utility import get_timesteps from lbmpy.advanced_streaming.utility import get_timesteps
from lbmpy.boundaries import NoSlip, UBB from lbmpy.boundaries import NoSlip, UBB
from lbmpy.creationfunctions import create_lb_method, create_lb_collision_rule from lbmpy.creationfunctions import create_lb_method, create_lb_collision_rule
from lbmpy import LBMConfig, LBMOptimisation, Stencil, Method, LBStencil from lbmpy import LBMConfig, LBMOptimisation, Stencil, Method, LBStencil, SubgridScaleModel
from pystencils_walberla import CodeGeneration, generate_info_header from pystencils_walberla import CodeGeneration, generate_info_header
from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator
...@@ -30,20 +30,24 @@ const char * infoCollisionSetup = "{collision_setup}"; ...@@ -30,20 +30,24 @@ const char * infoCollisionSetup = "{collision_setup}";
const bool infoCseGlobal = {cse_global}; const bool infoCseGlobal = {cse_global};
const bool infoCsePdfs = {cse_pdfs}; const bool infoCsePdfs = {cse_pdfs};
""" """
with CodeGeneration() as ctx: with CodeGeneration() as ctx:
field_type = "float64" if ctx.double_accuracy else "float32" field_type = "float64" if ctx.double_accuracy else "float32"
streaming_pattern = 'pull' streaming_pattern = 'esotwist'
timesteps = get_timesteps(streaming_pattern) timesteps = get_timesteps(streaming_pattern)
stencil = LBStencil(Stencil.D3Q19) stencil = LBStencil(Stencil.D3Q19)
method_enum = Method.TRT
fourth_order_correction = 0.01 if method_enum == Method.CUMULANT and stencil.Q == 27 else False
collision_setup = "cumulant-K17" if fourth_order_correction else method_enum.name.lower()
assert stencil.D == 3, "This application supports only three-dimensional stencils" assert stencil.D == 3, "This application supports only three-dimensional stencils"
pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {field_type}[3D]", layout='fzyx') pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {field_type}[3D]", layout='fzyx')
density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_type}[3D]", layout='fzyx') density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_type}[3D]", layout='fzyx')
macroscopic_fields = {'density': density_field, 'velocity': velocity_field} macroscopic_fields = {'density': density_field, 'velocity': velocity_field}
lbm_config = LBMConfig(stencil=stencil, method=Method.SRT, relaxation_rate=omega, lbm_config = LBMConfig(stencil=stencil, method=method_enum, relaxation_rate=omega, compressible=True,
fourth_order_correction=fourth_order_correction,
streaming_pattern=streaming_pattern) streaming_pattern=streaming_pattern)
lbm_opt = LBMOptimisation(cse_global=False, field_layout='fzyx') lbm_opt = LBMOptimisation(cse_global=False, field_layout='fzyx')
...@@ -66,7 +70,7 @@ with CodeGeneration() as ctx: ...@@ -66,7 +70,7 @@ with CodeGeneration() as ctx:
infoHeaderParams = { infoHeaderParams = {
'stencil': stencil.name.lower(), 'stencil': stencil.name.lower(),
'streaming_pattern': streaming_pattern, 'streaming_pattern': streaming_pattern,
'collision_setup': lbm_config.method.name.lower(), 'collision_setup': collision_setup,
'cse_global': int(lbm_opt.cse_global), 'cse_global': int(lbm_opt.cse_global),
'cse_pdfs': int(lbm_opt.cse_pdfs), 'cse_pdfs': int(lbm_opt.cse_pdfs),
} }
......
import waLBerla as wlb import waLBerla as wlb
from waLBerla.tools.config import block_decomposition
from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle
import sqlite3
import os
import sys
try:
import machinestate as ms
except ImportError:
ms = None
DB_FILE = os.environ.get('DB_FILE', "gpu_benchmark.sqlite3")
BENCHMARK = int(os.environ.get('BENCHMARK', 0))
WeakX = int(os.environ.get('WeakX', 128))
WeakY = int(os.environ.get('WeakY', 128))
WeakZ = int(os.environ.get('WeakZ', 128))
StrongX = int(os.environ.get('StrongX', 128))
StrongY = int(os.environ.get('StrongY', 128))
StrongZ = int(os.environ.get('StrongZ', 128))
class Scenario: class Scenario:
def __init__(self, domain_size=(64, 64, 64), root_blocks=(2, 2, 2), def __init__(self,
cells_per_block=(32, 32, 32), refinement_depth=0): domain_size=(64, 64, 64),
root_blocks=(2, 2, 2),
num_processes=1,
refinement_depth=0,
cells_per_block=(32, 32, 32),
timesteps=101,
gpu_enabled_mpi=False,
vtk_write_frequency=0,
logger_frequency=30,
blockforest_filestem="blockforest",
write_setup_vtk=True,
async_communication=False,
db_file_name=None):
self.domain_size = domain_size self.domain_size = domain_size
self.root_blocks = root_blocks self.root_blocks = root_blocks
self.cells_per_block = cells_per_block self.cells_per_block = cells_per_block
self.periodic = (0, 0, 1)
self.refinement_depth = refinement_depth self.refinement_depth = refinement_depth
self.num_processes = num_processes
self.bfs_filestem = blockforest_filestem
self.write_setup_vtk = write_setup_vtk
self.timesteps = timesteps
self.gpu_enabled_mpi = gpu_enabled_mpi
self.vtk_write_frequency = vtk_write_frequency
self.logger_frequency = logger_frequency
self.periodic = (0, 0, 0) self.async_communication = async_communication
self.db_file_name = DB_FILE if db_file_name is None else db_file_name
self.config_dict = self.config(print_dict=False) self.config_dict = self.config(print_dict=False)
...@@ -22,39 +66,80 @@ class Scenario: ...@@ -22,39 +66,80 @@ class Scenario:
'domainSize': self.domain_size, 'domainSize': self.domain_size,
'rootBlocks': self.root_blocks, 'rootBlocks': self.root_blocks,
'cellsPerBlock': self.cells_per_block, 'cellsPerBlock': self.cells_per_block,
'periodic': self.periodic 'periodic': self.periodic,
},
'SetupBlockForest': {
'refinementDepth': self.refinement_depth,
'numProcesses': self.num_processes,
'blockForestFilestem': self.bfs_filestem,
'writeVtk': self.write_setup_vtk,
'outputStatistics': True,
'writeSetupForestAndReturn': True,
}, },
'Parameters': { 'Parameters': {
'omega': 1.95, 'omega': 1.95,
'timesteps': 30001, 'timesteps': self.timesteps,
'remainingTimeLoggerFrequency': self.logger_frequency,
'refinementDepth': self.refinement_depth, 'vtkWriteFrequency': self.vtk_write_frequency,
'writeSetupForestAndReturn': False, 'useVTKAMRWriter': True,
'numProcesses': 1, 'oneFilePerProcess': False,
'writeOnlySlice': False,
'cudaEnabledMPI': False, 'gpuEnabledMPI': self.gpu_enabled_mpi,
'benchmarkKernelOnly': False, 'gpuBlockSize': (128, 1, 1),
'asyncCommunication': self.async_communication
'remainingTimeLoggerFrequency': 3,
'vtkWriteFrequency': 10000,
}, },
'Logging': { 'Logging': {
'logLevel': "info", 'logLevel': "info",
} }
} }
if print_dict and config_dict["Parameters"]["writeSetupForestAndReturn"] is False: if print_dict:
wlb.log_info_on_root("Scenario:\n" + pformat(config_dict)) wlb.log_info_on_root("Scenario:\n" + pformat(config_dict))
return config_dict return config_dict
@wlb.member_callback
def results_callback(self, **kwargs):
data = {}
data.update(self.config_dict['Parameters'])
data.update(self.config_dict['DomainSetup'])
data.update(kwargs)
data['executable'] = sys.argv[0]
data['compile_flags'] = wlb.build_info.compiler_flags
data['walberla_version'] = wlb.build_info.version
data['build_machine'] = wlb.build_info.build_machine
if ms:
state = ms.MachineState(extended=False, anonymous=True)
state.generate() # generate subclasses
state.update() # read information
data["MachineState"] = str(state.get())
else:
print("MachineState module is not available. MachineState was not saved")
sequenceValuesToScalars(data)
result = data
sequenceValuesToScalars(result)
num_tries = 4
# check multiple times e.g. may fail when multiple benchmark processes are running
table_name = f"runs"
table_name = table_name.replace("-", "_")
for num_try in range(num_tries):
try:
checkAndUpdateSchema(result, table_name, self.db_file_name)
storeSingle(result, table_name, self.db_file_name)
break
except sqlite3.OperationalError as e:
wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries} {str(e)}")
def validation_run(): def validation_run():
"""Run with full periodic shear flow or boundary scenario (ldc) to check if the code works""" """Run with full periodic shear flow or boundary scenario (ldc) to check if the code works"""
wlb.log_info_on_root("Validation run") wlb.log_info_on_root("Validation run")
domain_size = (96, 96, 96) domain_size = (192, 192, 64)
cells_per_block = (32, 32, 32) cells_per_block = (64, 64, 64)
root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)]) root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
...@@ -62,8 +147,96 @@ def validation_run(): ...@@ -62,8 +147,96 @@ def validation_run():
scenario = Scenario(domain_size=domain_size, scenario = Scenario(domain_size=domain_size,
root_blocks=root_blocks, root_blocks=root_blocks,
cells_per_block=cells_per_block, cells_per_block=cells_per_block,
refinement_depth=1) timesteps=0,
vtk_write_frequency=0,
refinement_depth=3,
gpu_enabled_mpi=False,
async_communication=False)
scenarios.add(scenario) scenarios.add(scenario)
validation_run() def weak_scaling_ldc(num_proc, gpu_enabled_mpi=False, uniform=True):
wlb.log_info_on_root("Running weak scaling benchmark...")
# This benchmark must run from 16 GPUs onwards
if wlb.mpi.numProcesses() > 1:
num_proc = wlb.mpi.numProcesses()
if uniform:
factor = 3 * num_proc
name = "uniform"
else:
if num_proc % 16 != 0:
raise RuntimeError("Number of processes must be dividable by 16")
factor = int(num_proc // 16)
name = "nonuniform"
cells_per_block = (WeakX, WeakY, WeakZ)
domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor)
root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
scenarios = wlb.ScenarioManager()
for async_communication in [False, True]:
scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}",
domain_size=domain_size,
root_blocks=root_blocks,
num_processes=num_proc,
cells_per_block=cells_per_block,
refinement_depth=0 if uniform else 3,
timesteps=10,
gpu_enabled_mpi=gpu_enabled_mpi,
async_communication=async_communication,
db_file_name=f"weakScalingGPU{name}LDC.sqlite3")
scenarios.add(scenario)
def strong_scaling_ldc(num_proc, gpu_enabled_mpi=False, uniform=True):
wlb.log_info_on_root("Running strong scaling benchmark...")
# This benchmark must run from 64 GPUs onwards
if wlb.mpi.numProcesses() > 1:
num_proc = wlb.mpi.numProcesses()
if num_proc % 64 != 0:
raise RuntimeError("Number of processes must be dividable by 64")
cells_per_block = (StrongX, StrongY, StrongZ)
if uniform:
domain_size = (cells_per_block[0] * 2, cells_per_block[1] * 2, cells_per_block[2] * 16)
name = "uniform"
else:
factor = int(num_proc / 64)
blocks64 = block_decomposition(factor)
cells_per_block = tuple([int(c / b) for c, b in zip(cells_per_block, reversed(blocks64))])
domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor)
name = "nonuniform"
root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
scenarios = wlb.ScenarioManager()
for async_communication in [False, True]:
scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}",
domain_size=domain_size,
root_blocks=root_blocks,
num_processes=num_proc,
cells_per_block=cells_per_block,
refinement_depth=0 if uniform else 3,
timesteps=10,
gpu_enabled_mpi=gpu_enabled_mpi,
async_communication=async_communication,
db_file_name=f"strongScalingGPU{name}LDC.sqlite3")
scenarios.add(scenario)
if BENCHMARK == 0:
validation_run()
elif BENCHMARK == 1:
weak_scaling_ldc(1, True, False)
elif BENCHMARK == 2:
strong_scaling_ldc(1, True, False)
else:
print(f"Invalid benchmark case {BENCHMARK}")