Skip to content
Snippets Groups Projects
Commit 1899cf01 authored by Martin Bauer's avatar Martin Bauer
Browse files

UniformGridGenerated: support for direct MPI dtype-based comm

parent 2d6fd6f0
No related branches found
No related tags found
No related merge requests found
...@@ -5,6 +5,7 @@ waLBerla_python_file_generates(UniformGridGenerated.py ...@@ -5,6 +5,7 @@ waLBerla_python_file_generates(UniformGridGenerated.py
GenMacroGetter.cpp GenMacroSetter.cpp GenMacroGetter.cpp GenMacroSetter.cpp
GenPackInfo.cpp GenPackInfoAAPush.cpp GenPackInfoAAPull.cpp GenPackInfo.cpp GenPackInfoAAPush.cpp GenPackInfoAAPull.cpp
GenLbKernel.cpp GenLbKernelAAEven.cpp GenLbKernelAAOdd.cpp GenLbKernel.cpp GenLbKernelAAEven.cpp GenLbKernelAAOdd.cpp
GenMpiDtypeInfo.h GenMpiDtypeInfoAAPull.h GenMpiDtypeInfoAAPush.h
GenDefines.h) GenDefines.h)
......
DomainSetup DomainSetup
{ {
blocks < 1, 1, 1 >; blocks < 1, 1, 1 >;
cellsPerBlock < 256, 128, 128 >; cellsPerBlock < 64, 64, 64 >;
periodic < 1, 1, 1 >; periodic < 1, 1, 1 >;
} }
Parameters Parameters
{ {
timesteps 400; // time steps of one performance measurement timesteps 3000; // time steps of one performance measurement
warmupSteps 1; // number of steps to run before measurement starts warmupSteps 1; // number of steps to run before measurement starts
outerIterations 1; // how many measurements to conduct outerIterations 1; // how many measurements to conduct
vtkWriteFrequency 0; // write a VTK file every n'th step, if zero VTK output is disabled vtkWriteFrequency 100; // write a VTK file every n'th step, if zero VTK output is disabled
cudaEnabledMPI false; // switch on if you have a CUDA-enabled MPI implementation cudaEnabledMPI false; // switch on if you have a CUDA-enabled MPI implementation
timeStepMode aaKernelOnly; // can be: noOverlap, simpleOverlap, complexOverlap, kernelOnly timeStepMode aa; // can be: noOverlap, simpleOverlap, complexOverlap, kernelOnly
remainingTimeLoggerFrequency 0; // interval in seconds to log the estimated remaining time remainingTimeLoggerFrequency 0; // interval in seconds to log the estimated remaining time
directComm 1;
omega 1.8; omega 1.8;
useGui 0; useGui 0;
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include "field/vtk/VTKWriter.h" #include "field/vtk/VTKWriter.h"
#include "field/AddToStorage.h" #include "field/AddToStorage.h"
#include "blockforest/communication/UniformBufferedScheme.h" #include "blockforest/communication/UniformBufferedScheme.h"
#include "blockforest/communication/UniformDirectScheme.h"
#include "timeloop/all.h" #include "timeloop/all.h"
#include "core/timing/TimingPool.h" #include "core/timing/TimingPool.h"
#include "core/timing/RemainingTimeLogger.h" #include "core/timing/RemainingTimeLogger.h"
...@@ -25,6 +26,9 @@ ...@@ -25,6 +26,9 @@
#include "GenPackInfo.h" #include "GenPackInfo.h"
#include "GenPackInfoAAPush.h" #include "GenPackInfoAAPush.h"
#include "GenPackInfoAAPull.h" #include "GenPackInfoAAPull.h"
#include "GenMpiDtypeInfo.h"
#include "GenMpiDtypeInfoAAPull.h"
#include "GenMpiDtypeInfoAAPush.h"
using namespace walberla; using namespace walberla;
...@@ -52,7 +56,7 @@ int main( int argc, char **argv ) ...@@ -52,7 +56,7 @@ int main( int argc, char **argv )
const real_t omega = parameters.getParameter<real_t>( "omega", real_c( 1.4 )); const real_t omega = parameters.getParameter<real_t>( "omega", real_c( 1.4 ));
uint_t timesteps = parameters.getParameter<uint_t>( "timesteps", uint_c( 60 )); uint_t timesteps = parameters.getParameter<uint_t>( "timesteps", uint_c( 60 ));
const real_t shearVelocityMagnitude = parameters.getParameter<real_t>("shearVelocityMagnitude", 0.08); const real_t shearVelocityMagnitude = parameters.getParameter<real_t>("shearVelocityMagnitude", 0.08);
const bool directComm = parameters.getParameter<bool>("directComm", false);
auto pdfFieldAdder = [](IBlock* const block, StructuredBlockStorage * const storage) { auto pdfFieldAdder = [](IBlock* const block, StructuredBlockStorage * const storage) {
return new PdfField_T(storage->getNumberOfXCells(*block), return new PdfField_T(storage->getNumberOfXCells(*block),
...@@ -74,6 +78,7 @@ int main( int argc, char **argv ) ...@@ -74,6 +78,7 @@ int main( int argc, char **argv )
for( auto & b : *blocks) for( auto & b : *blocks)
setterKernel(&b); setterKernel(&b);
// Buffered Comm
blockforest::communication::UniformBufferedScheme< Stencil_T > twoFieldComm(blocks ); blockforest::communication::UniformBufferedScheme< Stencil_T > twoFieldComm(blocks );
twoFieldComm.addPackInfo(make_shared< pystencils::GenPackInfo >(pdfFieldId ) ); twoFieldComm.addPackInfo(make_shared< pystencils::GenPackInfo >(pdfFieldId ) );
...@@ -83,12 +88,23 @@ int main( int argc, char **argv ) ...@@ -83,12 +88,23 @@ int main( int argc, char **argv )
blockforest::communication::UniformBufferedScheme< Stencil_T > aaPushComm(blocks); blockforest::communication::UniformBufferedScheme< Stencil_T > aaPushComm(blocks);
aaPushComm.addPackInfo(make_shared< pystencils::GenPackInfoAAPush>(pdfFieldId)); aaPushComm.addPackInfo(make_shared< pystencils::GenPackInfoAAPush>(pdfFieldId));
// Direct Comm
blockforest::communication::UniformDirectScheme< Stencil_T > twoFieldCommDirect(blocks);
twoFieldCommDirect.addDataToCommunicate(make_shared<pystencils::GenMpiDtypeInfo>(pdfFieldId));
blockforest::communication::UniformDirectScheme< Stencil_T > aaPullCommDirect(blocks);
aaPullCommDirect.addDataToCommunicate(make_shared<pystencils::GenMpiDtypeInfoAAPull>(pdfFieldId));
blockforest::communication::UniformDirectScheme< Stencil_T > aaPushCommDirect(blocks);
aaPushCommDirect.addDataToCommunicate(make_shared<pystencils::GenMpiDtypeInfoAAPush>(pdfFieldId));
using F = std::function<void()>;
SweepTimeloop timeLoop( blocks->getBlockStorage(), timesteps / 2 ); SweepTimeloop timeLoop( blocks->getBlockStorage(), timesteps / 2 );
if( timeStepMode == "twoField") if( timeStepMode == "twoField")
{ {
timeLoop.add() << BeforeFunction(twoFieldComm, "communication" ) timeLoop.add() << BeforeFunction(directComm ? F(twoFieldCommDirect) : F(twoFieldComm), "communication" )
<< Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide1" ); << Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide1" );
timeLoop.add() << BeforeFunction(twoFieldComm, "communication" ) timeLoop.add() << BeforeFunction(directComm ? F(twoFieldCommDirect) : F(twoFieldComm), "communication" )
<< Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide2" ); << Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide2" );
} else if ( timeStepMode == "twoFieldKernelOnly") { } else if ( timeStepMode == "twoFieldKernelOnly") {
...@@ -96,9 +112,9 @@ int main( int argc, char **argv ) ...@@ -96,9 +112,9 @@ int main( int argc, char **argv )
timeLoop.add() << Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide2" ); timeLoop.add() << Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide2" );
} else if ( timeStepMode == "aa") { } else if ( timeStepMode == "aa") {
timeLoop.add() << Sweep( pystencils::GenLbKernelAAEven(pdfFieldId, omega), "AA Even" ); timeLoop.add() << Sweep( pystencils::GenLbKernelAAEven(pdfFieldId, omega), "AA Even" );
timeLoop.add() << BeforeFunction( aaPullComm ) timeLoop.add() << BeforeFunction( directComm ? F(aaPullCommDirect) : F(aaPullComm) )
<< Sweep( pystencils::GenLbKernelAAOdd(pdfFieldId, omega), "AA Odd") << Sweep( pystencils::GenLbKernelAAOdd(pdfFieldId, omega), "AA Odd")
<< AfterFunction( aaPushComm ); << AfterFunction( directComm ? F(aaPushCommDirect) : F(aaPushComm) );
} else if ( timeStepMode == "aaKernelOnly") { } else if ( timeStepMode == "aaKernelOnly") {
timeLoop.add() << Sweep( pystencils::GenLbKernelAAEven(pdfFieldId, omega), "AA Even" ); timeLoop.add() << Sweep( pystencils::GenLbKernelAAEven(pdfFieldId, omega), "AA Even" );
timeLoop.add() << Sweep( pystencils::GenLbKernelAAOdd(pdfFieldId, omega), "AA Odd"); timeLoop.add() << Sweep( pystencils::GenLbKernelAAOdd(pdfFieldId, omega), "AA Odd");
......
import sympy as sp import sympy as sp
import pystencils as ps import pystencils as ps
from lbmpy.creationfunctions import create_lb_update_rule from lbmpy.creationfunctions import create_lb_update_rule
from pystencils_walberla import CodeGeneration, generate_pack_info_from_kernel, generate_sweep from pystencils_walberla import CodeGeneration, generate_pack_info_from_kernel, generate_sweep, generate_mpidtype_info_from_kernel
from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter
from lbmpy.fieldaccess import AAEvenTimeStepAccessor, AAOddTimeStepAccessor from lbmpy.fieldaccess import AAEvenTimeStepAccessor, AAOddTimeStepAccessor
...@@ -135,6 +135,10 @@ with CodeGeneration() as ctx: ...@@ -135,6 +135,10 @@ with CodeGeneration() as ctx:
generate_pack_info_from_kernel(ctx, 'GenPackInfoAAPush', update_rule_aa_odd, kind='push', generate_pack_info_from_kernel(ctx, 'GenPackInfoAAPush', update_rule_aa_odd, kind='push',
cpu_vectorize_info={'instruction_set': None}) cpu_vectorize_info={'instruction_set': None})
generate_mpidtype_info_from_kernel(ctx, 'GenMpiDtypeInfo', update_rule_two_field)
generate_mpidtype_info_from_kernel(ctx, 'GenMpiDtypeInfoAAPull', update_rule_aa_odd, kind='pull')
generate_mpidtype_info_from_kernel(ctx, 'GenMpiDtypeInfoAAPush', update_rule_aa_odd, kind='push')
# Info Header # Info Header
infoHeaderParams = { infoHeaderParams = {
'stencil': stencil_str, 'stencil': stencil_str,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment