Select Git revision
CodegenJacobiGPU.cpp
Forked from
waLBerla / waLBerla
172 commits behind the upstream repository.

Markus Holzer authored and
Helen Schottenhamml
committed
CodegenJacobiGPU.cpp 7.13 KiB
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file JacobiGpu.cpp
//! \author Martin Bauer <martin.bauer@fau.de>
//
//======================================================================================================================
#include "CudaJacobiKernel2D.h"
#include "CudaJacobiKernel3D.h"
#include "gpu/HostFieldAllocator.h"
#include "blockforest/Initialization.h"
#include "blockforest/communication/UniformDirectScheme.h"
#include "blockforest/communication/UniformBufferedScheme.h"
#include "core/Environment.h"
#include "core/debug/TestSubsystem.h"
#include "gpu/FieldCopy.h"
#include "gpu/GPUField.h"
#include "gpu/AddGPUFieldToStorage.h"
#include "gpu/communication/GPUPackInfo.h"
#include "gpu/FieldIndexing.h"
#include "field/AddToStorage.h"
#include "field/communication/UniformMPIDatatypeInfo.h"
#include "field/vtk/VTKWriter.h"
#include "geometry/initializer/ScalarFieldFromGrayScaleImage.h"
#include "stencil/D2Q9.h"
#include "stencil/D3Q7.h"
#include "timeloop/SweepTimeloop.h"
using namespace walberla;
using ScalarField = GhostLayerField<real_t, 1>;
using GPUField = gpu::GPUField<real_t>;
ScalarField * createField( IBlock* const block, StructuredBlockStorage* const storage )
{
auto xSize = storage->getNumberOfXCells( *block );
auto ySize = storage->getNumberOfYCells( *block );
auto zSize = storage->getNumberOfZCells( *block );
auto numberOfGhostLayers = uint_c(1);
auto initialValue = real_c(0);
auto fieldLayout = field::fzyx;
return new ScalarField (xSize, ySize, zSize,
numberOfGhostLayers, initialValue, fieldLayout,
make_shared< gpu::HostFieldAllocator<real_t> >() // allocator for host pinned memory
);
}
void testJacobi2D()
{
uint_t const xSize = 20;
uint_t const ySize = 20;
// Create blocks
shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid (
uint_t(1) , uint_t(1), uint_t(1), // number of blocks in x,y,z direction
xSize, ySize, uint_t(1), // how many cells per block (x,y,z)
real_c(1.0), // dx: length of one cell in physical coordinates
false, // one block per process - "false" means all blocks to one process
true, true, true ); // no periodicity
BlockDataID const cpuFieldID = blocks->addStructuredBlockData<ScalarField>( &createField, "CPU Field" );
BlockDataID const gpuField = gpu::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Src" );
// Initialize a quarter of the field with ones, the rest remains 0
// Jacobi averages the domain -> every cell should be at 0.25 at sufficiently many timesteps
for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
{
auto f = blockIt->getData<ScalarField>( cpuFieldID );
for( cell_idx_t y = 0; y < cell_idx_c( f->ySize() / 2 ); ++y )
for( cell_idx_t x = 0; x < cell_idx_c( f->xSize() / 2 ); ++x )
f->get( x, y, 0 ) = real_c(1.0);
}
using CommScheme = blockforest::communication::UniformBufferedScheme<stencil::D2Q9>;
using Packing = gpu::communication::GPUPackInfo<GPUField> ;
CommScheme commScheme(blocks);
commScheme.addDataToCommunicate( make_shared<Packing>(gpuField) );
// Create Timeloop
const uint_t numberOfTimesteps = uint_t(800);
SweepTimeloop timeloop ( blocks, numberOfTimesteps );
// Registering the sweep
timeloop.add() << BeforeFunction( commScheme, "Communication" )
<< Sweep( pystencils::CudaJacobiKernel2D(gpuField), "Jacobi Kernel" );
gpu::fieldCpy<GPUField, ScalarField>( blocks, gpuField, cpuFieldID );
timeloop.run();
gpu::fieldCpy<ScalarField, GPUField>( blocks, cpuFieldID, gpuField );
auto firstBlock = blocks->begin();
auto f = firstBlock->getData<ScalarField>( cpuFieldID );
WALBERLA_CHECK_FLOAT_EQUAL(f->get(0,0,0), real_c(1.0 / 4.0))
}
void testJacobi3D()
{
uint_t const xSize = 12;
uint_t const ySize = 12;
uint_t const zSize = 12;
// Create blocks
shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid (
uint_t(1) , uint_t(1), uint_t(1), // number of blocks in x,y,z direction
xSize, ySize, zSize, // how many cells per block (x,y,z)
real_c(1.0), // dx: length of one cell in physical coordinates
false, // one block per process - "false" means all blocks to one process
true, true, true ); // no periodicity
BlockDataID const cpuFieldID = blocks->addStructuredBlockData<ScalarField>( &createField, "CPU Field" );
BlockDataID const gpuField = gpu::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Src" );
// Initialize a quarter of the field with ones, the rest remains 0
// Jacobi averages the domain -> every cell should be at 0.25 at sufficiently many timesteps
for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
{
auto f = blockIt->getData<ScalarField>( cpuFieldID );
for( cell_idx_t z = 0; z < cell_idx_c( f->zSize() / 2 ); ++z )
for( cell_idx_t y = 0; y < cell_idx_c( f->ySize() / 2 ); ++y )
for( cell_idx_t x = 0; x < cell_idx_c( f->xSize() / 2 ); ++x )
f->get( x, y, z ) = real_c(1.0);
}
using CommScheme = blockforest::communication::UniformBufferedScheme<stencil::D3Q7>;
using Packing = gpu::communication::GPUPackInfo<GPUField>;
CommScheme commScheme(blocks);
commScheme.addDataToCommunicate( make_shared<Packing>(gpuField) );
// Create Timeloop
const uint_t numberOfTimesteps = uint_t(800);
SweepTimeloop timeloop ( blocks, numberOfTimesteps );
// Registering the sweep
timeloop.add() << BeforeFunction( commScheme, "Communication" )
<< Sweep( pystencils::CudaJacobiKernel3D(gpuField), "Jacobi Kernel" );
gpu::fieldCpy<GPUField, ScalarField>( blocks, gpuField, cpuFieldID );
timeloop.run();
gpu::fieldCpy<ScalarField, GPUField>( blocks, cpuFieldID, gpuField );
auto firstBlock = blocks->begin();
auto f = firstBlock->getData<ScalarField>( cpuFieldID );
WALBERLA_CHECK_FLOAT_EQUAL(f->get(0,0,0), real_c(1.0 / 8.0))
}
int main( int argc, char ** argv )
{
mpi::Environment const env( argc, argv );
debug::enterTestMode();
testJacobi2D();
testJacobi3D();
return EXIT_SUCCESS;
}