Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • 121-buffersystem-receiver-info-without-sender-ranks
  • 3-stable
  • 4-stable
  • AddaptTypeSystem
  • CMakeCodeGenPartTwo
  • ChannelFlow
  • CoVortex
  • CodegenForRefinement
  • CommunicationGPUBenchmark
  • ComnbinedGPUPackinfo
  • ExportCudaDeviceSelection
  • FixSinglePrecisionProblems
  • FlagFieldExample
  • FlowAroundSphere
  • FreeSurface
  • GPURefineTest
  • GPURefinement
  • GPURefinementImprovement
  • HRR
  • HydroPressure
  • IBC
  • InterpolationBC
  • Italy
  • LDC
  • Lagoon
  • LeesEdwards
  • ListLBM
  • NewChannelBenchmark
  • Remove_fSize_from_templates
  • SphereMovie
  • TGA
  • TaylorBubble
  • TurbulentChannel
  • UpgradePystencils
  • VTKUnstructured
  • clang11
  • develop
  • develop2
  • fluidizedbed_showcase
  • master
  • phaseField
  • phasefield-drop
  • porous
  • porousHeat
  • remiPorous
  • s2a
  • setup_walberla_codegen
  • vbondmodel_integrated
  • vbondmodel_isotropic
  • v3.1
  • v3.2
  • v3.3
  • v4.0dev
  • v4.1
  • v4.2
  • v5.0dev
56 results

Target

Select target project
  • castellsc/walberla
  • ravi.k.ayyala/walberla
  • em73etav/walberla
  • hoenig/walberla
  • le45zyci/walberla
  • sudesh.rathnayake/walberla
  • el38efib/walberla
  • rahil.doshi/walberla
  • Bindgen/walberla
  • ArashPartow/walberla
  • jarmatz/walberla
  • ec93ujoh/walberla
  • walberla/walberla
  • ProjectPhysX/walberla
  • ob28imeq/walberla
  • shellshocked2003/walberla
  • stewart/walberla
  • jbadwaik/walberla
  • behzad.safaei/walberla
  • schruff/walberla
  • loreson/walberla
  • Novermars/walberla
  • itischler/walberla
  • holzer/walberla
  • da15siwa/walberla
  • he66coqe/walberla
  • jngrad/walberla
  • uq60ifih/walberla
  • ostanin/walberla
  • bauer/walberla
  • zy79zopo/walberla
  • jonas_schmitt/walberla
  • po60nani/walberla
  • ro36vugi/walberla
  • fweik/walberla
  • ab04unyc/walberla
  • yw25ynew/walberla
  • ig38otak/walberla
  • RudolfWeeber/walberla
39 results
Select Git revision
  • 121-buffersystem-receiver-info-without-sender-ranks
  • 128-some-tests-are-not-active
  • 146-cuda-gcc-config-warning
  • 3-stable
  • 4-stable
  • 5-stable
  • 6-stable
  • 7-stable
  • 727-refactor-sqlExport
  • AtomicAdd_for_CUDA_compute_capabilities<6.0
  • ChargedParticles
  • CodegenForRefinement
  • GeneratedOutflowBC
  • RayleighBernardConvection
  • Remove_fSize_from_templates
  • UpdateGPUBenchmark
  • UpdatePhaseField
  • alt/doxygen_release_note
  • angersbach/coding-day-01-09
  • antidunes-visualization
  • bam_piping_erosion
  • benchmark_sqlite_modify
  • change-default-layout-fzyx
  • clang-tidy
  • clang11
  • clang_tidy2
  • cmake_cleanup
  • cnt_app
  • codegen-update
  • coding-day-01-09-mesh
  • coupling_tutorial
  • doshi/coding-day-01-09
  • externalize_dependencies
  • fix_nvcc_compiler_warnings
  • fluidizedbed_showcase
  • hip-ShiftedPeriodicity
  • kajol/coding-day
  • kemmler/particle_coupling_GPU
  • lbmpy-kernel-comparison
  • master
  • mr_refactor_wfb
  • plewinski/fix-Guo-force-model-TRT-MRT
  • pystencils2.0-adoption
  • rangersbach/doxygen_style
  • ravi/coding-day
  • ravi/material_transport
  • setup_walberla_codegen
  • suction_bucket
  • suffa/NorthWind
  • suffa/NorthWind_refined
  • suffa/SYCL
  • suffa/Sparse
  • suffa/compact_interpolation
  • suffa/fix_blockwise_local_communication
  • suffa/fix_force_on_boundary
  • suffa/integrate_moving_geo
  • suffa/psm_lbm_package
  • thermalFreeSurfaceLBM
  • thoennes/cusotm-mpi-reduce-function
  • use-correct-codegen-data-type
  • viscLDCwithFSLBM
  • v3.1
  • v3.2
  • v3.3
  • v4.0dev
  • v4.1
  • v4.2
  • v5.0dev
  • v5.1
  • v6.0dev
  • v6.1
  • v7.0dev
  • v7.1
73 results
Show changes
Commits on Source (2)
...@@ -32,5 +32,9 @@ if ( WALBERLA_BUILD_WITH_PYTHON ) ...@@ -32,5 +32,9 @@ if ( WALBERLA_BUILD_WITH_PYTHON )
endif() endif()
if ( WALBERLA_BUILD_WITH_CODEGEN AND WALBERLA_BUILD_WITH_CUDA )
add_subdirectory( FlagFieldGPU )
endif()
waLBerla_link_files_to_builddir( "*.prm" )
if (WALBERLA_BUILD_WITH_CUDA)
waLBerla_generate_target_from_python(NAME FlagFieldGPUGenerated
FILE FlagFieldGPUCodeGen.py
OUT_FILES FlagFieldGPU_LbSweep.cu FlagFieldGPU_LbSweep.h
FlagFieldGPU_MacroSetter.cu FlagFieldGPU_MacroSetter.h
FlagFieldGPU_UBB.cu FlagFieldGPU_UBB.h
FlagFieldGPU_NoSlip.cu FlagFieldGPU_NoSlip.h
FlagFieldGPU_Outflow.cu FlagFieldGPU_Outflow.h
FlagFieldGPU_PackInfo.cu FlagFieldGPU_PackInfo.h
FlagFieldGPU_InfoHeader.h)
waLBerla_add_executable( NAME FlagFieldGPU FILE FlagFieldGPU.cpp
DEPENDS blockforest boundary core domain_decomposition field geometry timeloop vtk FlagFieldGPUGenerated)
endif()
\ No newline at end of file
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file FlagFieldGPU.cpp
//! \author Markus Holzer <markus.holzer@fau.de>
//
//======================================================================================================================
#include "blockforest/all.h"
#include "core/all.h"
#include "domain_decomposition/all.h"
#include "field/all.h"
#include "geometry/all.h"
#include "timeloop/all.h"
#if defined(WALBERLA_BUILD_WITH_CUDA)
# include "cuda/AddGPUFieldToStorage.h"
# include "cuda/DeviceSelectMPI.h"
# include "cuda/HostFieldAllocator.h"
# include "cuda/NVTX.h"
# include "cuda/ParallelStreams.h"
# include "cuda/communication/GPUPackInfo.h"
# include "cuda/communication/UniformGPUScheme.h"
#endif
// CodeGen includes
#include "FlagFieldGPU_InfoHeader.h"
namespace walberla
{
typedef lbm::FlagFieldGPU_PackInfo PackInfo_T;
typedef walberla::uint8_t flag_t;
typedef FlagField< flag_t > FlagField_T;
typedef cuda::GPUField< real_t > GPUField;
typedef cuda::GPUField< uint8_t > GPUField_int;
auto pdfFieldAdder = [](IBlock* const block, StructuredBlockStorage* const storage) {
return new PdfField_T(storage->getNumberOfXCells(*block), storage->getNumberOfYCells(*block),
storage->getNumberOfZCells(*block), uint_t(1), field::fzyx,
make_shared< field::AllocateAligned< real_t, 64 > >());
};
int main(int argc, char** argv)
{
walberla::Environment walberlaEnv(argc, argv);
cuda::selectDeviceBasedOnMpiRank();
auto config = walberlaEnv.config();
auto blocks = blockforest::createUniformBlockGridFromConfig(config);
// read parameters
auto parameters = config->getOneBlock("Parameters");
const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(10));
const real_t omega = parameters.getParameter< real_t >("omega", real_t(1.9));
const real_t u_max = parameters.getParameter< real_t >("u_max", real_t(0.05));
const real_t reynolds_number = parameters.getParameter< real_t >("reynolds_number", real_t(1000));
const uint_t diameter_sphere = parameters.getParameter< uint_t >("diameter_sphere", uint_t(5));
const real_t remainingTimeLoggerFrequency =
parameters.getParameter< real_t >("remainingTimeLoggerFrequency", 3.0); // in seconds
// create fields
BlockDataID pdfFieldID = blocks->addStructuredBlockData< PdfField_T >(pdfFieldAdder, "PDFs");
BlockDataID velFieldID = field::addToStorage< VelocityField_T >(blocks, "velocity", real_t(0), field::fzyx);
BlockDataID densityFieldID = field::addToStorage< ScalarField_T >(blocks, "density", real_t(0), field::fzyx);
BlockDataID pdfFieldIDGPU = cuda::addGPUFieldToStorage< PdfField_T >(blocks, pdfFieldID, "PDFs on GPU", true);
BlockDataID velFieldIDGPU =
cuda::addGPUFieldToStorage< VelocityField_T >(blocks, velFieldID, "velocity on GPU", true);
BlockDataID densityFieldIDGPU =
cuda::addGPUFieldToStorage< ScalarField_T >(blocks, densityFieldID, "density on GPU", true);
BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field");
BlockDataID flagFieldId_gpu = cuda::addGPUFieldToStorage< FlagField_T >(blocks, flagFieldId, "flag on GPU", true);
// initialise all PDFs
pystencils::FlagFieldGPU_MacroSetter setterSweep(pdfFieldIDGPU, velFieldIDGPU);
for (auto& block : *blocks)
setterSweep(&block);
cuda::fieldCpy< PdfField_T, GPUField >(blocks, pdfFieldID, pdfFieldIDGPU);
// Create communication
cuda::communication::UniformGPUScheme< Stencil_T > communication(blocks, false);
communication.addPackInfo(make_shared< PackInfo_T >(pdfFieldIDGPU));
auto comm = std::function< void() >([&]() { communication.communicate(nullptr); });
// create and initialize boundary handling
const FlagUID fluidFlagUID("Fluid");
auto boundariesConfig = config->getOneBlock("Boundaries");
lbm::FlagFieldGPU_UBB ubb(blocks, pdfFieldIDGPU);
lbm::FlagFieldGPU_NoSlip noSlip(blocks, pdfFieldIDGPU);
lbm::FlagFieldGPU_Outflow outflow(blocks, pdfFieldIDGPU, pdfFieldID);
geometry::initBoundaryHandling< FlagField_T >(*blocks, flagFieldId, boundariesConfig);
geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldId, fluidFlagUID);
ubb.fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("UBB"), fluidFlagUID);
noSlip.fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("NoSlip"), fluidFlagUID);
outflow.fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("Outflow"), fluidFlagUID);
cuda::fieldCpy< GPUField_int, FlagField_T >(blocks, flagFieldId_gpu, flagFieldId);
pystencils::FlagFieldGPU_LbSweep lbSweep(densityFieldIDGPU, flagFieldId_gpu, pdfFieldIDGPU, velFieldIDGPU, omega);
// create time loop
SweepTimeloop timeloop(blocks->getBlockStorage(), timesteps);
// add LBM sweep and communication to time loop
timeloop.add() << BeforeFunction(comm, "communication") << Sweep(outflow, "outflow boundary");
timeloop.add() << Sweep(ubb, "ubb boundary");
timeloop.add() << Sweep(noSlip, "noSlip boundary");
timeloop.add() << Sweep(lbSweep, "LB update rule");
// LBM stability check
timeloop.addFuncAfterTimeStep(makeSharedFunctor(field::makeStabilityChecker< PdfField_T, FlagField_T >(
config, blocks, pdfFieldID, flagFieldId, fluidFlagUID)),
"LBM stability check");
// log remaining time
timeloop.addFuncAfterTimeStep(timing::RemainingTimeLogger(timeloop.getNrOfTimeSteps(), remainingTimeLoggerFrequency),
"remaining time logger");
// add VTK output to time loop
uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
if (vtkWriteFrequency > 0)
{
auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
"simulation_step", false, true, true, false, 0);
vtkOutput->addBeforeFunction([&]() {
cuda::fieldCpy< VelocityField_T, GPUField >(blocks, velFieldID, velFieldIDGPU);
cuda::fieldCpy< ScalarField_T, GPUField >(blocks, densityFieldID, densityFieldIDGPU);
});
auto velWriter = make_shared< field::VTKWriter< VelocityField_T > >(velFieldID, "velocity");
auto densityWriter = make_shared< field::VTKWriter< ScalarField_T > >(densityFieldID, "density");
auto flagWriter = make_shared< field::VTKWriter< FlagField_T > >(flagFieldId, "flagField");
vtkOutput->addCellDataWriter(velWriter);
vtkOutput->addCellDataWriter(densityWriter);
vtkOutput->addCellDataWriter(flagWriter);
timeloop.addFuncAfterTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
}
WcTimer simTimer;
WALBERLA_LOG_INFO_ON_ROOT("Simulating flow around sphere:"
"\n timesteps: "
<< timesteps << "\n reynolds number: " << reynolds_number
<< "\n relaxation rate: " << omega << "\n maximum inflow velocity: " << u_max
<< "\n diameter_sphere: " << diameter_sphere)
simTimer.start();
timeloop.run();
simTimer.end();
WALBERLA_LOG_INFO_ON_ROOT("Simulation finished")
auto time = simTimer.last();
// get the number of fluid cells on the block
uint64_t nrOfFluidCells = 0;
uint64_t nrOfBoundaryCells = 0;
for (auto& block : *blocks)
{
auto* flagField = block.getData< FlagField_T >(flagFieldId);
auto domainFlag = flagField->getFlag(fluidFlagUID);
for (auto it = flagField->begin(); it != flagField->end(); ++it)
{
if (isFlagSet(it, domainFlag)) { nrOfFluidCells += 1; }
if (!isFlagSet(it, domainFlag)) { nrOfBoundaryCells += 1; }
}
}
auto mlupsPerProcess = real_c(nrOfFluidCells) * real_c(timesteps) / time * 1e-6;
// TODO: when going to multiple GPUs the performance should be measured on each GPU. At the moment only performance
// on root is considered.
WALBERLA_LOG_RESULT_ON_ROOT("Fluid Cells on the block " << nrOfFluidCells)
WALBERLA_LOG_RESULT_ON_ROOT("Boundary Cells on the block " << nrOfBoundaryCells)
WALBERLA_LOG_RESULT_ON_ROOT("MLUPS per process " << mlupsPerProcess)
WALBERLA_LOG_RESULT_ON_ROOT("Time per time step " << time / real_c(timesteps))
return EXIT_SUCCESS;
}
} // namespace walberla
int main(int argc, char** argv) { walberla::main(argc, argv); }
from pystencils.field import fields
from pystencils.astnodes import Block, Conditional
from lbmpy.macroscopic_value_kernels import macroscopic_values_setter
from lbmpy.stencils import get_stencil
from lbmpy.creationfunctions import create_lb_update_rule, create_lb_method
from lbmpy.boundaries import NoSlip, UBB, ExtrapolationOutflow
from pystencils_walberla import CodeGeneration, generate_sweep, generate_info_header
from lbmpy_walberla.additional_data_handler import UBBAdditionalDataHandler, OutflowAdditionalDataHandler
from lbmpy_walberla import generate_boundary, generate_lb_pack_info
import sympy as sp
with CodeGeneration() as ctx:
dtype = 'float64' if ctx.double_accuracy else 'float32'
stencil = get_stencil("D3Q19")
q = len(stencil)
dim = len(stencil[0])
pdfs, pdfs_tmp = fields(f"pdfs({q}), pdfs_tmp({q}) : {dtype}[{dim}D]", layout='fzyx')
velocity_field, density_field = fields(f"velocity({dim}), density(1) : {dtype}[{dim}D]", layout='fzyx')
flag = fields(f"flag_field: uint8[{dim}D]", layout='fzyx')
omega = sp.Symbol("omega")
u_max = sp.Symbol("u_max")
output = {
'density': density_field,
'velocity': velocity_field
}
method = create_lb_method(stencil=stencil, method='srt', relaxation_rate=omega, compressible=True)
update_rule = create_lb_update_rule(lb_method=method,
output=output,
optimization={"symbolic_field": pdfs,
"symbolic_temporary_field": pdfs_tmp,
"double_precision": True if ctx.double_accuracy else False},
kernel_type='stream_pull_collide')
update_rule = [Conditional(sp.Eq(flag.center(), 8), Block(update_rule))]
# getter & setter
setter_assignments = macroscopic_values_setter(method, velocity=velocity_field.center_vector,
pdfs=pdfs, density=1.0)
stencil_typedefs = {'Stencil_T': stencil}
field_typedefs = {'PdfField_T': pdfs,
'VelocityField_T': velocity_field,
'ScalarField_T': density_field}
target = 'gpu'
# sweeps
generate_sweep(ctx, 'FlagFieldGPU_LbSweep', update_rule,
field_swaps=[(pdfs, pdfs_tmp)], target=target)
generate_sweep(ctx, 'FlagFieldGPU_MacroSetter', setter_assignments, target=target)
# boundaries
ubb = UBB((0.05, 0, 0))
outflow = ExtrapolationOutflow(stencil[4], method)
outflow_data_handler = OutflowAdditionalDataHandler(stencil, outflow, target=target)
generate_boundary(ctx, 'FlagFieldGPU_UBB', ubb, method, target=target)
generate_boundary(ctx, 'FlagFieldGPU_Outflow', outflow, method,
target=target, additional_data_handler=outflow_data_handler)
generate_boundary(ctx, 'FlagFieldGPU_NoSlip', NoSlip(), method, target=target, streaming_pattern='pull')
# communication
generate_lb_pack_info(ctx, 'FlagFieldGPU_PackInfo', stencil, pdfs, target=target)
# Info header containing correct template definitions for stencil and field
generate_info_header(ctx, 'FlagFieldGPU_InfoHeader',
stencil_typedefs=stencil_typedefs, field_typedefs=field_typedefs)
Parameters
{
omega 1.8;
timesteps 1001;
u_max 0.05;
vtkWriteFrequency 250;
reynolds_number 100;
diameter_sphere 32;
}
DomainSetup
{
blocks < 1, 1, 1 >;
cellsPerBlock < 128, 64, 64 >;
periodic < 0, 0, 0 >;
}
Boundaries
{
Border { direction W; walldistance -1; flag UBB; }
Border { direction E; walldistance -1; flag Outflow; }
Border { direction S; walldistance -1; flag NoSlip; }
Border { direction N; walldistance -1; flag NoSlip; }
Border { direction T; walldistance -1; flag NoSlip; }
Border { direction B; walldistance -1; flag NoSlip; }
Body
{
shape Sphere;
midpoint <64, 32, 32>;
radius 16;
flag NoSlip;
}
}