diff --git a/tests/BasicLbmScenarios/SimDomain.hpp b/tests/BasicLbmScenarios/SimDomain.hpp index 1777c5583c0cccccf959bab638a85dd0ee1c8455..192d17e8a05a679255cf1141ea494a6854131303 100644 --- a/tests/BasicLbmScenarios/SimDomain.hpp +++ b/tests/BasicLbmScenarios/SimDomain.hpp @@ -11,6 +11,17 @@ #include "gen/LbmAlgorithms.hpp" +#if defined(LBM_SCENARIOS_GPU_BUILD) +# include "gpu/AddGPUFieldToStorage.h" +# include "gpu/FieldCopy.h" +# include "gpu/GPUField.h" +# include "gpu/GPUWrapper.h" +# include "gpu/HostFieldAllocator.h" +# include "gpu/communication/MemcpyPackInfo.h" +#include "gpu/communication/GPUPackInfo.h" +# include "gpu/communication/UniformGPUScheme.h" +#endif + namespace BasicLbmScenarios { @@ -20,8 +31,17 @@ using PdfField_T = field::GhostLayerField< real_t, gen::LbStencil::Q >; using ScalarField_T = field::GhostLayerField< real_t, 1 >; using VectorField_T = field::GhostLayerField< real_t, gen::LbStencil::D >; using FlagField_T = FlagField< uint8_t >; -using CommScheme = blockforest::communication::UniformBufferedScheme< gen::LbStencil >; -using PdfsPackInfo = field::communication::StencilRestrictedPackInfo< PdfField_T, gen::LbStencil >; + +using CpuCommScheme = blockforest::communication::UniformBufferedScheme< gen::LbStencil >; +using CpuPdfsPackInfo = field::communication::StencilRestrictedPackInfo< PdfField_T, gen::LbStencil >; + +#if defined(LBM_SCENARIOS_GPU_BUILD) +using CommonGpuField = gpu::GPUField< PdfField_T::value_type >; + +using GpuCommScheme = gpu::communication::UniformGPUScheme< gen::LbStencil >; +// using GpuPdfsPackInfo = gpu::communication::MemcpyPackInfo< CommonGpuField >; +using GpuPdfsPackInfo = gpu::communication::GPUPackInfo< CommonGpuField >; +#endif struct SimDomain { @@ -35,7 +55,7 @@ struct SimDomain const BlockDataID flagFieldId; } cpuFields; - CommScheme comm; + CpuCommScheme commCpu; #if defined(LBM_SCENARIOS_GPU_BUILD) struct @@ -45,6 +65,8 @@ struct SimDomain const BlockDataID uId; } gpuFields; + // GpuCommScheme commGpu; + void initFromFields(const Vector3< real_t > force) { gen::bulk::LbInitFromFields initialize{ gpuFields.pdfsId, gpuFields.rhoId, gpuFields.uId, force }; @@ -53,6 +75,8 @@ struct SimDomain { initialize(&b); } + + wait(); } void initConstant(const real_t rho, const Vector3< real_t > u, const Vector3< real_t > force) @@ -63,6 +87,8 @@ struct SimDomain { initialize(&b); } + + wait(); } gen::bulk::LbStreamCollide streamCollideSweep(const real_t omega, const Vector3< real_t > force) @@ -70,8 +96,20 @@ struct SimDomain return { gpuFields.pdfsId, gpuFields.rhoId, gpuFields.uId, force, omega }; } - void sync() { - /* TODO */ + void wait() { WALBERLA_GPU_CHECK(gpuDeviceSynchronize()); } + + void syncGhostLayers() + { + // WALBERLA_GPU_CHECK(gpuPeekAtLastError()); + commCpu(); + } + + void sync2cpu() + { + wait(); + gpu::fieldCpy< PdfField_T, CommonGpuField >(blocks, cpuFields.pdfsId, gpuFields.pdfsId); + gpu::fieldCpy< ScalarField_T, CommonGpuField >(blocks, cpuFields.rhoId, gpuFields.rhoId); + gpu::fieldCpy< VectorField_T, CommonGpuField >(blocks, cpuFields.uId, gpuFields.uId); } #else @@ -101,7 +139,11 @@ struct SimDomain return { cpuFields.pdfsId, cpuFields.rhoId, cpuFields.uId, force, omega }; } - void sync() { /* NOP */ } + void syncGhostLayers() { commCpu(); } + + void wait() { /* NOP */ } + + void sync2cpu() { /* NOP */ } #endif @@ -132,19 +174,33 @@ struct SimDomainBuilder blockforest::createUniformBlockGrid(blocks[0], blocks[1], blocks[2], cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2], 1.0, true, periodic[0], periodic[1], periodic[2]); - const BlockDataID pdfsId = field::addToStorage< PdfField_T >(sbfs, "f", real_c(0.0)); - const BlockDataID rhoId = field::addToStorage< ScalarField_T >(sbfs, "rho", real_c(0.0)); - const BlockDataID uId = field::addToStorage< VectorField_T >(sbfs, "u", real_c(0.0)); +#if defined(LBM_SCENARIOS_GPU_BUILD) + auto hostAlloc = make_shared< gpu::HostFieldAllocator< PdfField_T::value_type > >(); +#else + auto hostAlloc = make_shared< field::FieldAllocator< PdfField_T::value_type > >(); +#endif + + const BlockDataID pdfsId = field::addToStorage< PdfField_T >(sbfs, "f", real_c(0.0), field::fzyx, 1, hostAlloc); + const BlockDataID rhoId = field::addToStorage< ScalarField_T >(sbfs, "rho", real_c(0.0), field::fzyx, 1, hostAlloc); + const BlockDataID uId = field::addToStorage< VectorField_T >(sbfs, "u", real_c(0.0), field::fzyx, 1, hostAlloc); const BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(sbfs, "flagField"); + CpuCommScheme commCpu{ sbfs }; + auto pdfsPackInfo = std::make_shared< CpuPdfsPackInfo >(pdfsId); + commCpu.addPackInfo(pdfsPackInfo); + #if defined(LBM_SCENARIOS_GPU_BUILD) - static_assert(false, "TODO: Create GPU fields"); + const BlockDataID pdfsIdGpu = gpu::addGPUFieldToStorage< PdfField_T >(sbfs, pdfsId, "f_gpu"); + const BlockDataID rhoIdGpu = gpu::addGPUFieldToStorage< ScalarField_T >(sbfs, rhoId, "rho_gpu"); + const BlockDataID uIdGpu = gpu::addGPUFieldToStorage< VectorField_T >(sbfs, uId, "u_gpu"); + + // GpuCommScheme commGpu{ sbfs }; + // auto gpuPdfsPackInfo = std::make_shared< GpuPdfsPackInfo >(pdfsIdGpu); + // commGpu.addPackInfo(gpuPdfsPackInfo); + auto gpuPdfsPackInfo = std::make_shared< GpuPdfsPackInfo >(pdfsIdGpu); + commCpu.addPackInfo(gpuPdfsPackInfo); #endif - CommScheme comm{ sbfs }; - auto pdfsPackInfo = std::make_shared< PdfsPackInfo >(pdfsId); - comm.addPackInfo(pdfsPackInfo); - return { .blocks = sbfs, // @@ -154,7 +210,15 @@ struct SimDomainBuilder .uId = uId, .flagFieldId = flagFieldId }, // - .comm = comm + .commCpu = commCpu, // +#if defined(LBM_SCENARIOS_GPU_BUILD) + .gpuFields = { // + .pdfsId = pdfsIdGpu, + .rhoId = rhoIdGpu, + .uId = uIdGpu + }, + // .commGpu = commGpu +#endif }; } }; diff --git a/tests/BasicLbmScenarios/TestBasicLbmScenarios.cpp b/tests/BasicLbmScenarios/TestBasicLbmScenarios.cpp index d9f806997a1987cc6bb8fabd5a3af3fc930b5d1b..5c983746531c82a82fbf7114724e5938dc7c4a8f 100644 --- a/tests/BasicLbmScenarios/TestBasicLbmScenarios.cpp +++ b/tests/BasicLbmScenarios/TestBasicLbmScenarios.cpp @@ -32,13 +32,13 @@ void fullyPeriodic(Environment& env) auto streamCollide = dom.streamCollideSweep( 1.0, force ); for(uint_t t = 0; t < 10; ++t){ - dom.comm(); + dom.syncGhostLayers(); dom.forAllBlocks([&](IBlock & b){ streamCollide(&b); }); - dom.sync(); + dom.sync2cpu(); dom.forAllBlocks([&](auto & block){ const VectorField_T & velField = *block.template getData< VectorField_T >(dom.cpuFields.uId); @@ -129,7 +129,7 @@ void freeSlipPipe(Environment& env) for (uint_t i = 0; i < 10; ++i) { - dom.comm(); + dom.syncGhostLayers(); for (auto& block : *dom.blocks) { velOutput(); @@ -160,5 +160,7 @@ int main(int argc, char** argv) { walberla::Environment env{ argc, argv }; BasicLbmScenarios::fullyPeriodic(env); +#if !defined(LBM_SCENARIOS_GPU_BUILD) BasicLbmScenarios::freeSlipPipe(env); +#endif }