From 9549e956a26462ab305c51ed3735e5b740d0c0ad Mon Sep 17 00:00:00 2001 From: Markus Holzer <mholzer@ldmpe865h.onecert.fr> Date: Wed, 29 Jan 2025 16:14:44 +0100 Subject: [PATCH] [skip ci] update --- apps/showcases/PorousMediaGPU/CMakeLists.txt | 4 +- apps/showcases/PorousMediaGPU/Inflow.cpp | 143 +++++++++++++++ apps/showcases/PorousMediaGPU/Inflow.h | 98 ++++++++++ .../PorousMediaGPU/PorousMediaGPU.cpp | 170 +++-------------- .../PorousMediaGPU/PorousMediaGPU.prm | 18 +- apps/showcases/PorousMediaGPU/util.cpp | 92 ++++++++++ apps/showcases/PorousMediaGPU/util.h | 33 ++++ apps/showcases/PorousMediaGPU/utilGPU.cu | 171 ++++++++++++++++++ apps/showcases/PorousMediaGPU/utilGPU.h | 37 ++++ 9 files changed, 614 insertions(+), 152 deletions(-) create mode 100644 apps/showcases/PorousMediaGPU/Inflow.cpp create mode 100644 apps/showcases/PorousMediaGPU/Inflow.h create mode 100644 apps/showcases/PorousMediaGPU/util.cpp create mode 100644 apps/showcases/PorousMediaGPU/util.h create mode 100644 apps/showcases/PorousMediaGPU/utilGPU.cu create mode 100644 apps/showcases/PorousMediaGPU/utilGPU.h diff --git a/apps/showcases/PorousMediaGPU/CMakeLists.txt b/apps/showcases/PorousMediaGPU/CMakeLists.txt index 0d69de5d8..5de0f2ccf 100644 --- a/apps/showcases/PorousMediaGPU/CMakeLists.txt +++ b/apps/showcases/PorousMediaGPU/CMakeLists.txt @@ -18,11 +18,11 @@ waLBerla_generate_target_from_python(NAME PorousMediaGPUCodeGen if (WALBERLA_BUILD_WITH_CUDA OR WALBERLA_BUILD_WITH_HIP) waLBerla_add_executable ( NAME PorousMediaGPU - FILES PorousMediaGPU.cpp GridGeneration.h Setup.h Types.h InitializerFunctions.cpp TPMS.cpp + FILES PorousMediaGPU.cpp GridGeneration.h Setup.h Types.h InitializerFunctions.cpp TPMS.cpp Inflow.cpp util.cpp utilGPU.cu DEPENDS blockforest boundary core field gpu lbm_generated postprocessing stencil timeloop vtk PorousMediaGPUCodeGen ) else() waLBerla_add_executable ( NAME PorousMediaGPU - FILES PorousMediaGPU.cpp GridGeneration.h Setup.h Types.h InitializerFunctions.cpp TPMS.cpp + FILES PorousMediaGPU.cpp GridGeneration.h Setup.h Types.h InitializerFunctions.cpp TPMS.cpp Inflow.cpp util.cpp DEPENDS blockforest boundary core field lbm_generated postprocessing stencil timeloop vtk PorousMediaGPUCodeGen ) endif(WALBERLA_BUILD_WITH_CUDA OR WALBERLA_BUILD_WITH_HIP) diff --git a/apps/showcases/PorousMediaGPU/Inflow.cpp b/apps/showcases/PorousMediaGPU/Inflow.cpp new file mode 100644 index 000000000..f3b009b41 --- /dev/null +++ b/apps/showcases/PorousMediaGPU/Inflow.cpp @@ -0,0 +1,143 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file InitializerFunctions.h +//! \author Markus Holzer <markus.holzer@fau.de> +// +//====================================================================================================================== +#include "Inflow.h" + +namespace walberla{ + +void Inflow::logging(){ + WALBERLA_LOG_INFO_ON_ROOT( "Using turbulent injection with:" << std::setprecision(16) << + "\n + kin. viscosity: " << nu_ << " [m^2/s]" << + "\n + uRMS: " << urms_ << + "\n + characteristicLength: " << le_ << + "\n + numberOfFourrierModes: " << n_ << + "\n + alpha: " << alpha_ << + "\n + kappa VKP: " << kappaVKP_ << + "\n + epsilon VKP: " << epsilonVKP_ << + "\n + kappa Kolmogorov: " << kappaKolmogorov_ << + "\n + firstMode: " << firstMode_ << + "\n + lastMode: " << lastMode_) +} + +void Inflow::updateCPU(){ + + const real_t t = real_c(executionCounter_) * dt_; + for (auto& block : *blocks_){ + auto velocityFieldInflowSlice = block.template getData< Field_T >(ids_.velocityFieldInflowSlice); + const cell_idx_t zSize = cell_idx_c( velocityFieldInflowSlice->zSize() ); + const cell_idx_t ySize = cell_idx_c( velocityFieldInflowSlice->ySize() ); + const cell_idx_t gl = cell_idx_c( velocityFieldInflowSlice->nrOfGhostLayers() ); + + WALBERLA_ASSERT_EQUAL(blocks_->getLevel( block ), uint_c(0), "Inflow must be located on coarsest resolution") + + for( cell_idx_t z = 0; z < zSize; ++z ) { + for( cell_idx_t y = 0; y < ySize; ++y ){ + auto minCorner = block.getAABB().minCorner(); + + const real_t zCenter = (globalMinCorner_[2] + ( real_c( z ) + real_c(0.5) ) * dx_) + minCorner[2]; + const real_t yCenter = (globalMinCorner_[1] + ( real_c( y ) + real_c(0.5) ) * dx_) + minCorner[1]; + + Vector3<real_t> p(real_c(0.0), yCenter, zCenter); + + const Vector3<real_t> v = summOfFourrierModes(p, uc_, kappa_, ut_, t); + // velocityFieldInflowSlice->get(0, y, z, 0) = latticeVelocity_ + v[0]; + velocityFieldInflowSlice->get(0, y, z, 0) = v[0]; + velocityFieldInflowSlice->get(0, y, z, 1) = v[1]; + velocityFieldInflowSlice->get(0, y, z, 2) = v[2]; + } + } + } + gpu::fieldCpy< GPUField_T, Field_T >(blocks_, ids_.velocityFieldInflowSliceGPU, ids_.velocityFieldInflowSlice); + ++executionCounter_; +} + +void Inflow::updateGPU(){ + + const real_t t = real_c(executionCounter_) * dt_; + for (auto& block : *blocks_){ + + auto blockMinCorner = block.getAABB().min(); + + auto velocityGPU = block.template getData< GPUField_T >(ids_.velocityFieldInflowSliceGPU); + updateVelocityPlane(velocityGPU, kappaGPU_, utGPU_, globalMinCorner_, blockMinCorner, uc_, + dx_, t, n_, latticeVelocity_, block_, grid_); + + } + ++executionCounter_; +} + +void Inflow::init(){ + nu_ = setup_.kinViscosity; + urms_ = setup_.urms; + le_ = setup_.characteristicLength; + n_ = setup_.numberOfFourrierModes; + dt_ = setup_.dt; + dx_ = setup_.dxC; + uc_[0] = setup_.uc[0]; + uc_[1] = setup_.uc[1]; + uc_[2] = setup_.uc[2]; + + latticeVelocity_ = setup_.latticeVelocity; + + alpha_ = (real_c(55.0) * std::tgamma(real_c(5.0) / real_c(6.0))) / (real_c(9.0) * std::sqrt(math::pi) * std::tgamma(real_c(1.0) / real_c(3.0))); + kappaVKP_ = (real_c(9.0) * math::pi * alpha_) / (real_c(55.0) * le_); + // epsilonVKP_ = (std::pow(alpha_ * std::tgamma(real_c(2.0)/real_c(3.0)), (real_c(3.0)/real_c(2.0)))) * (real_c(0.5) * kappaVKP_ * std::pow(urms_, real_c(3.0))); + epsilonVKP_ = std::pow(urms_, real_c(3.0)) / le_; + kappaKolmogorov_ = std::pow(epsilonVKP_ / std::pow(nu_, real_c(3.0)), real_c(1.0)/real_c(4.0)); + + firstMode_ = std::log10(kappaVKP_ / real_c(1000.0)); + lastMode_ = std::log10(kappaKolmogorov_ * real_c(1000.0)); + const real_t linDeltaKappa = (lastMode_ - firstMode_) / (real_c(n_) - real_c(1.0)); + + kappa_.resize(n_); + ut_.resize(n_); + real_t utSquaredSum = 0.0; + for(uint_t i = 0; i < n_; ++i){ + const real_t kappaN = std::pow(real_c(10.0), firstMode_ + (real_c(i) * linDeltaKappa)); + const real_t kappaN1 = std::pow(real_c(10.0), firstMode_ + (real_c(i + 1) * linDeltaKappa)); + const real_t deltaKappa = kappaN1 - kappaN; + + kappa_[i] = kappaN; + const real_t tmp = std::sqrt(getKarmanPaoEnergy(kappaN, alpha_, kappaVKP_, kappaKolmogorov_, urms_) * deltaKappa); + ut_[i] = tmp; + utSquaredSum += tmp*tmp; + } + + const real_t correctionTKE = std::sqrt(((real_c(3.0) / real_c(2.0)) * (urms_*urms_)) / utSquaredSum); + + for(uint_t i = 0; i < n_; ++i){ + ut_[i] *= correctionTKE; + } + + globalMinCorner_ = blocks_->getDomain().min(); + + const Vector3<uint_t> s(n_, setup_.cellsPerBlock[1], setup_.cellsPerBlock[2]); + getBlockAndGridSize(s, block_, grid_); + + WALBERLA_LOG_DEVEL_VAR(block_) + WALBERLA_LOG_DEVEL_VAR(grid_) + + WALBERLA_GPU_CHECK(gpuMalloc((void **)&kappaGPU_, sizeof(real_t) * kappa_.size() )); + WALBERLA_GPU_CHECK(gpuMemcpy( kappaGPU_, &kappa_[0], sizeof(real_t) * kappa_.size(), gpuMemcpyHostToDevice )); + + WALBERLA_GPU_CHECK(gpuMalloc((void **)&utGPU_, sizeof(real_t) * ut_.size() )); + WALBERLA_GPU_CHECK(gpuMemcpy( utGPU_, &ut_[0], sizeof(real_t) * ut_.size(), gpuMemcpyHostToDevice )); +} + +} // namespace walberla diff --git a/apps/showcases/PorousMediaGPU/Inflow.h b/apps/showcases/PorousMediaGPU/Inflow.h new file mode 100644 index 000000000..6950214eb --- /dev/null +++ b/apps/showcases/PorousMediaGPU/Inflow.h @@ -0,0 +1,98 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file Inflow.h +//! \author Markus Holzer <markus.holzer@fau.de> +// +//====================================================================================================================== +#pragma once + +#include "core/logging/Initialization.h" +#include "core/math/Constants.h" + +#include "gpu/GPUWrapper.h" +#include "gpu/ErrorChecking.h" +#include "gpu/FieldCopy.h" + +// #include <curand_kernel.h> + +#include "Setup.h" +#include "util.h" +#include "utilGPU.h" + +namespace walberla{ + +class Inflow +{ + public: + Inflow(const std::shared_ptr< StructuredBlockForest >& blocks, const Setup& setup, const IDs& ids) + : blocks_(blocks), setup_(setup), ids_(ids), executionCounter_(uint_c(0)) + { + init(); + logging(); + } + + using Field_T = walberla::field::GhostLayerField<real_t, 3>; + using GPUField_T = gpu::GPUField< real_t >; + + ~Inflow(){ + WALBERLA_GPU_CHECK(gpuFree(kappaGPU_)) + WALBERLA_GPU_CHECK(gpuFree(utGPU_)) + } + + void init(); + void logging(); + + std::function<void()> updateCPUFunctor() {return [this](){ Inflow::updateCPU();};} + std::function<void()> updateGPUFunctor() {return [this](){ Inflow::updateGPU();};} + + void updateCPU(); + void updateGPU(); + + private: + const std::shared_ptr< StructuredBlockForest > blocks_; + const Setup setup_; + const IDs ids_; + + real_t nu_; + real_t urms_; + real_t le_; + uint_t n_; + real_t alpha_; + real_t kappaVKP_; + real_t epsilonVKP_; + real_t kappaKolmogorov_; + + real_t dt_; + real_t dx_; + Vector3<real_t> uc_; + real_t latticeVelocity_; + + real_t firstMode_; + real_t lastMode_; + std::vector<real_t> kappa_; + std::vector<real_t> ut_; + + uint_t executionCounter_; // number of times operator() has been called + + Vector3<real_t> globalMinCorner_; + + Vector3<uint32_t> block_; + Vector3<uint32_t> grid_; + real_t *kappaGPU_; + real_t *utGPU_; + +}; // class Inflow +} // namespace walberla diff --git a/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp b/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp index 90c6326aa..63354ab57 100644 --- a/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp +++ b/apps/showcases/PorousMediaGPU/PorousMediaGPU.cpp @@ -29,7 +29,6 @@ #include "core/logging/Logging.h" #include "core/math/Vector3.h" #include "core/math/Constants.h" -#include "core/math/Random.h" #include "core/Environment.h" #include "core/mpi/MPIManager.h" #include "core/MemoryUsage.h" @@ -80,6 +79,7 @@ #include <iostream> #include <memory> +#include "Inflow.h" #include "InitializerFunctions.h" #include "GridGeneration.h" #include "Setup.h" @@ -110,142 +110,6 @@ inline Vector3< uint_t > inflowSliceSize( const shared_ptr< StructuredBlockStora return Vector3<uint_t>( uint_c(1), blocks->getNumberOfYCells( *block ), blocks->getNumberOfZCells( *block ) ); } -template< typename Field_T > -class Inflow -{ - public: - Inflow(const std::shared_ptr< StructuredBlockForest >& blocks, const Setup& setup, const IDs& ids) - : blocks_(blocks), setup_(setup), ids_(ids), executionCounter_(uint_c(0)) - { - nu_ = setup_.kinViscosity; - urms_ = setup_.urms; - le_ = setup_.characteristicLength; - n_ = setup_.numberOfFourrierModes; - dt_ = setup_.dt; - uc_ = setup_.uc; - - alpha_ = (real_c(55.0) * std::tgamma(real_c(5.0) / real_c(6.0))) / (real_c(9.0) * std::sqrt(math::pi) * std::tgamma(real_c(1.0) / real_c(3.0))); - kappaVKP_ = (real_c(9.0) * math::pi * alpha_) / (real_c(55.0) * le_); - epsilonVKP_ = (std::pow(alpha_ * std::tgamma(real_c(2.0)/real_c(3.0)), (real_c(3.0)/real_c(2.0)))) * (real_c(0.5) * kappaVKP_ * std::pow(urms_, real_c(3.0))); - kappaKolmogorov_ = std::pow(epsilonVKP_ / std::pow(nu_, real_c(3.0)), real_c(1.0)/real_c(4.0)); - - s_ = std::log10(kappaVKP_ / real_c(2.0)); - e_ = std::log10(kappaKolmogorov_ * real_c(2.0)); - firstMode_ = std::pow(real_c(10.0), s_); - lastMode_ = std::pow(real_c(10.0), e_); - deltaKappa_ = (lastMode_ - firstMode_) / (real_c(n_) - real_c(1.0)); - - logging(); - } - - void operator()(){ - ++executionCounter_; - - // only evaluate in given intervals - // if (executionCounter_ % interval_ != uint_c(0)) { return; } - - // update the inflow velocity plane - update(); - } - - void logging(){ - WALBERLA_LOG_INFO_ON_ROOT( "Using turbulent injection with:" << std::setprecision(16) << - "\n + kin. viscosity: " << nu_ << " [m^2/s]" << - "\n + uRMS: " << urms_ << - "\n + characteristicLength: " << le_ << - "\n + numberOfFourrierModes: " << n_ << - "\n + alpha: " << alpha_ << - "\n + kappa VKP: " << kappaVKP_ << - "\n + epsilon VKP: " << epsilonVKP_ << - "\n + kappa Kolmogorov: " << kappaKolmogorov_ << - "\n + firstMode: " << firstMode_ << - "\n + lastMode: " << lastMode_) - } - - private: - const std::shared_ptr< StructuredBlockForest > blocks_; - const Setup setup_; - const IDs ids_; - - real_t nu_; - real_t urms_; - real_t le_; - uint_t n_; - real_t alpha_; - real_t kappaVKP_; - real_t epsilonVKP_; - real_t kappaKolmogorov_; - - real_t dt_; - Vector3<real_t> uc_; - - real_t s_; - real_t e_; - real_t firstMode_; - real_t lastMode_; - real_t deltaKappa_; - - uint_t executionCounter_; // number of times operator() has been called - - void update(){ - WALBERLA_LOG_INFO_ON_ROOT("Update inflow") - for (auto& block : *blocks_){ - auto velocityFieldInflowSlice = block.template getData< Field_T >(ids_.velocityFieldInflowSlice); - WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(velocityFieldInflowSlice, Cell globalCell; - velocityFieldInflowSlice->get(x, y, z, 0) = 0.05; - ) - } - gpu::fieldCpy< gpu::GPUField< typename Field_T::value_type >, Field_T >(blocks_, ids_.velocityFieldInflowSliceGPU, ids_.velocityFieldInflowSlice); - - } - - real_t getKarmanPaoEnergy(const real_t kappa){ - const real_t a1 = alpha_ * std::pow(urms_, real_c(2.0)) / kappaVKP_; - const real_t a2 = std::pow(kappa / kappaVKP_, real_c(4.0)); - const real_t a3 = std::pow(real_c(1.0) + std::pow(kappa / kappaVKP_, real_c(2.0)), real_c(17.0) / real_c(6.0)); - const real_t a4 = std::exp(real_c(-2.0) * std::pow(kappa / kappaKolmogorov_, real_c(2.0))); - return a1 * a2 / a3 * a4; - } - - Vector3<real_t> summOfFourrierModes(Vector3<real_t> p){ - - Vector3<real_t> result(real_c(0.0), real_c(0.0), real_c(0.0)); - - for(uint_t i = 0; i < n_; ++i){ - - const real_t a = math::realRandom(real_c(0.0), real_c(1.0)); - const real_t theta = std::acos(real_c(2.0) * a - real_c(1.0)); - const real_t phi = real_c(2.0) * math::pi * math::realRandom(real_c(0.0), real_c(1.0)); - const real_t psi = real_c(2.0) * math::pi * math::realRandom(real_c(0.0), real_c(1.0)); - - const real_t kappaN = firstMode_ + real_c(0.5) * deltaKappa_ + real_c(i) * deltaKappa_; - - Vector3<real_t> kappaVector(std::cos(phi) * std::sin(theta) * kappaN, - std::sin(theta) * std::sin(phi) * kappaN, - std::cos(theta) * kappaN); - - Vector3<real_t> zetta(math::realRandom(real_c(-1.0), real_c(1.0)), - math::realRandom(real_c(-1.0), real_c(1.0)), - math::realRandom(real_c(-1.0), real_c(1.0))); - zetta = zetta.getNormalized(); - - Vector3<real_t> crossProduct = kappaVector%=zetta; - Vector3<real_t> sigma = crossProduct / crossProduct.getNormalized(); - - const real_t ut = std::sqrt(getKarmanPaoEnergy(kappaN) * deltaKappa_); - const real_t t = executionCounter_ * dt_; - - const Vector3<real_t> x = p - t*uc_; - const real_t arg = std::cos(kappaVector * x + psi); - - result += real_c(2.0) * ut * arg * sigma; - } - - return result; - } - -}; // class Inflow - int main(int argc, char** argv){ Environment env( argc, argv ); mpi::MPIManager::instance()->useWorldComm(); @@ -317,7 +181,7 @@ int main(int argc, char** argv){ ids.densityFieldGPU = gpu::addGPUFieldToStorage< ScalarField_T >(blocks, ids.densityField, "velocity on GPU", true); std::function< Vector3< uint_t > (const shared_ptr< StructuredBlockStorage >&, IBlock * const) > getInflowSliceSize = inflowSliceSize; - ids.velocityFieldInflowSlice = field::addToStorage< VelocityField_T >(blocks, "velocity Inflow", getInflowSliceSize, macroFieldType(0.0), field::fzyx, uint_c(1)); + ids.velocityFieldInflowSlice = field::addToStorage< VelocityField_T >(blocks, "velocity Inflow", getInflowSliceSize, macroFieldType(0.0), field::fzyx, uint_c(0)); ids.velocityFieldInflowSliceGPU = gpu::addGPUFieldToStorage< VelocityField_T >(blocks, ids.velocityFieldInflowSlice, "velocity Inflow on GPU", true); const Cell innerOuterSplit = Cell(parameters.getParameter< Vector3<cell_idx_t> >("innerOuterSplit", Vector3<cell_idx_t>(1, 1, 1))); @@ -329,7 +193,31 @@ int main(int argc, char** argv){ } sweepCollection.initialiseBlockPointer(); - Inflow<VelocityField_T> inflow(blocks, setup, ids); + auto inflow = std::make_shared<Inflow>(blocks, setup, ids); + + const bool cpu = false; + if(cpu){ + WALBERLA_LOG_INFO_ON_ROOT("CPU Version") + inflow->updateCPU(); + } + else{ + WALBERLA_LOG_INFO_ON_ROOT("GPU Version") + inflow->updateGPU(); + gpu::fieldCpy< VelocityField_T, gpu::GPUField< VelocityField_T::value_type > >(blocks, ids.velocityFieldInflowSlice, ids.velocityFieldInflowSliceGPU); + } + + + for (auto& block : *blocks){ + auto velocityFieldInflowSlice = block.getData< VelocityField_T >(ids.velocityFieldInflowSlice); + + WALBERLA_LOG_DEVEL_VAR(velocityFieldInflowSlice->get(0,20,20,0)) + WALBERLA_LOG_DEVEL_VAR(velocityFieldInflowSlice->get(0,20,20,1)) + WALBERLA_LOG_DEVEL_VAR(velocityFieldInflowSlice->get(0,20,20,2)) + } + + return EXIT_SUCCESS; + + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// LB SWEEPS AND BOUNDARY HANDLING /// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -350,7 +238,7 @@ int main(int argc, char** argv){ initGrid(blocks, ids.flagField, setup); } - tpms.setupBoundary(blocks, ids.flagField); + // tpms.setupBoundary(blocks, ids.flagField); geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, ids.flagField, setup.fluidUID); // BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfFieldGPU, fluidFlagUID, latticeVelocity); BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfFieldGPU, setup.fluidUID, ids.velocityFieldInflowSliceGPU, ids.pdfField); @@ -378,7 +266,7 @@ int main(int argc, char** argv){ auto defaultStream = gpu::StreamRAII::newPriorityStream(streamLowPriority); SweepTimeloop timeloop(blocks->getBlockStorage(), setup.timesteps); - timeloop.addFuncBeforeTimeStep(inflow, "Update inflow velocity"); + timeloop.addFuncBeforeTimeStep(inflow->updateGPUFunctor(), "Update inflow velocity"); lbm_generated::BasicRecursiveTimeStepGPU< GPUPdfField_T, SweepCollection_T, BoundaryCollection_T > LBMMeshRefinement(blocks, ids.pdfFieldGPU, sweepCollection, boundaryCollection, nonUniformCommunication, nonUniformPackInfo); diff --git a/apps/showcases/PorousMediaGPU/PorousMediaGPU.prm b/apps/showcases/PorousMediaGPU/PorousMediaGPU.prm index 877f46b3d..f3243cf9f 100644 --- a/apps/showcases/PorousMediaGPU/PorousMediaGPU.prm +++ b/apps/showcases/PorousMediaGPU/PorousMediaGPU.prm @@ -7,7 +7,7 @@ Parameters latticeVelocity 0.05; initialiseWithInletVelocity true; - timesteps 11; + timesteps 1; processMemoryLimit 512; // MiB innerOuterSplit <1, 1, 1>; @@ -19,10 +19,10 @@ Parameters Turbulence { useGrid false; - U_rms 0.1; - characteristicLength 0.3333333333333333; - numberOfFourrierModes 5; - U_c < 1.0, 0.0, 0.0 >; + U_rms 0.05; + characteristicLength 0.4; + numberOfFourrierModes 100; + U_c < 0.05, 0.0, 0.0 >; } TPMS @@ -40,7 +40,7 @@ DomainSetup { domainSize < 7, 1, 1 >; // cellsPerBlock < 1344, 192, 192 >; - cellsPerBlock < 168, 24, 24 >; + cellsPerBlock < 336, 48, 48 >; blocks < 1, 1, 1 >; periodic < false, true, true >; refinementLevels 0; @@ -65,10 +65,10 @@ StabilityChecker VTKWriter { - vtkWriteFrequency 5; + vtkWriteFrequency 0; vtkGhostLayers 0; velocity true; - writeVelocityAsMagnitude true; + writeVelocityAsMagnitude false; density false; flag true; writeOnlySlice false; @@ -86,7 +86,7 @@ Logging writeSetupForestAndReturn false; // When only one process is used the decomposition is writen and the program terminates readSetupFromFile false; remainingTimeLoggerFrequency 20; // in seconds - writeSurfaceMeshOfTPMS true; + writeSurfaceMeshOfTPMS false; } Evaluation diff --git a/apps/showcases/PorousMediaGPU/util.cpp b/apps/showcases/PorousMediaGPU/util.cpp new file mode 100644 index 000000000..90d79836a --- /dev/null +++ b/apps/showcases/PorousMediaGPU/util.cpp @@ -0,0 +1,92 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file util.cpp +//! \author Markus Holzer <markus.holzer@fau.de> +// +//====================================================================================================================== +#include "util.h" + +namespace walberla +{ + +Vector3<real_t> summOfFourrierModes(const Vector3<real_t>& p, const Vector3<real_t>& uc, + const std::vector<real_t>& kappa, const std::vector<real_t>& ut, const real_t t){ + + Vector3<real_t> result(real_c(0.0), real_c(0.0), real_c(0.0)); + const real_t pi2 = real_c(2.0) * math::pi; + const uint_t n = uint_c(kappa.size()); + + for(uint_t i = 0; i < n; ++i){ + const real_t a = 0.1; // math::realRandom(real_c(0.0), real_c(1.0)); + const real_t phi = 0.2; //math::realRandom(real_c(0.0), real_c(1.0)) * pi2; + const real_t psi = 0.3; //math::realRandom(real_c(0.0), real_c(1.0)) * pi2; + const real_t zetaX = -0.4; //math::realRandom(real_c(-1.0), real_c(1.0)); + const real_t zetaY = -0.6; //math::realRandom(real_c(-1.0), real_c(1.0)); + const real_t zetaZ = 0.8; // math::realRandom(real_c(-1.0), real_c(1.0)); + + const real_t theta = std::acos(real_c(2.0) * a - real_c(1.0)); + + const real_t kappaN = kappa[i]; + const real_t kappaX = std::cos(phi) * std::sin(theta) * kappaN; + const real_t kappaY = std::sin(theta) * std::sin(phi) * kappaN; + const real_t kappaZ = std::cos(theta) * kappaN; + // WALBERLA_LOG_INFO_ON_ROOT("Kappa: [" << kappaX << ", " << kappaY << ", " << kappaZ << "]") + + const real_t zettaLength = std::sqrt(zetaX*zetaX + zetaY*zetaY + zetaZ*zetaZ); + // WALBERLA_LOG_DEVEL_VAR(zettaLength) + + const real_t zetaXNormalized = zetaX / zettaLength; + const real_t zetaYNormalized = zetaY / zettaLength; + const real_t zetaZNormalized = zetaZ / zettaLength; + + // WALBERLA_LOG_INFO_ON_ROOT("zeta: [" << zetaXNormalized << ", " << zetaYNormalized << ", " << zetaZNormalized << "]") + + + const real_t sigmaX = kappaY * zetaZNormalized - kappaZ * zetaYNormalized; + const real_t sigmaY = kappaZ * zetaXNormalized - kappaX * zetaZNormalized; + const real_t sigmaZ = kappaX * zetaYNormalized - kappaY * zetaXNormalized; + + const real_t sigmaLength = std::sqrt(sigmaX*sigmaX + sigmaY*sigmaY + sigmaZ*sigmaZ); + // WALBERLA_LOG_DEVEL_VAR(sigmaLength) + + const real_t sigmaXNormalized = sigmaX / sigmaLength; + const real_t sigmaYNormalized = sigmaY / sigmaLength; + const real_t sigmaZNormalized = sigmaZ / sigmaLength; + + // WALBERLA_LOG_INFO_ON_ROOT("Sigma: [" << sigmaXNormalized << ", " << sigmaYNormalized << ", " << sigmaZNormalized << "]") + + const real_t pX = p[0] - (t * uc[0]); + const real_t pY = p[1] - (t * uc[1]); + const real_t pZ = p[2] - (t * uc[2]); + + const real_t arg = real_c(2.0) * ut[i] * std::cos(kappaX * pX + kappaY * pY + kappaZ * pZ + psi); + + result[0] += arg * sigmaXNormalized; + result[1] += arg * sigmaYNormalized; + result[2] += arg * sigmaZNormalized; + } + return result; +} + +real_t getKarmanPaoEnergy(const real_t k, const real_t alpha, const real_t kappaVKP, const real_t kappaKolmogorov, const real_t uRMS){ + const real_t a1 = alpha * std::pow(uRMS, real_c(2.0)) / kappaVKP; + const real_t a2 = std::pow(k / kappaVKP, real_c(4.0)); + const real_t a3 = std::pow(real_c(1.0) + std::pow(k / kappaVKP, real_c(2.0)), real_c(17.0) / real_c(6.0)); + const real_t a4 = std::exp(real_c(-2.0) * std::pow(k / kappaKolmogorov, real_c(2.0))); + return a1 * a2 / a3 * a4; +} + +} // namespace walberla \ No newline at end of file diff --git a/apps/showcases/PorousMediaGPU/util.h b/apps/showcases/PorousMediaGPU/util.h new file mode 100644 index 000000000..55750c8e4 --- /dev/null +++ b/apps/showcases/PorousMediaGPU/util.h @@ -0,0 +1,33 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file util.h +//! \author Markus Holzer <markus.holzer@fau.de> +// +//====================================================================================================================== +#pragma once + +#include "core/DataTypes.h" +#include "core/math/Constants.h" +#include "core/math/Vector3.h" +#include "core/math/Random.h" + +namespace walberla{ +Vector3<real_t> summOfFourrierModes(const Vector3<real_t>& p, const Vector3<real_t>& uc, + const std::vector<real_t>& kappa, const std::vector<real_t>& ut, const real_t t); + +real_t getKarmanPaoEnergy(const real_t k, const real_t alpha, const real_t kappaVKP, const real_t kappaKolmogorov, const real_t uRMS); + +} // namespace walberla \ No newline at end of file diff --git a/apps/showcases/PorousMediaGPU/utilGPU.cu b/apps/showcases/PorousMediaGPU/utilGPU.cu new file mode 100644 index 000000000..7cb5912b7 --- /dev/null +++ b/apps/showcases/PorousMediaGPU/utilGPU.cu @@ -0,0 +1,171 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file util.cu +//! \author Markus Holzer <markus.holzer@fau.de> +// +//====================================================================================================================== +#include "utilGPU.h" + +namespace walberla +{ + +namespace internal{ + +static __device__ void singleMode(double* rX, double* rY, double* rZ, const double kappaN, const double ut, + const double pX, const double pY, const double pZ) +{ + + /* + curandState state; + curand_init(42, xStride*x + yStride*y + zStride*z, 0, &state); + + const real_t a = curand_uniform(&state); + const real_t phi = curand_uniform(&state) * pi2; + const real_t psi = curand_uniform(&state) * pi2; + const real_t zetaX = real_t(2.0) * curand_uniform(&state) - real_t(1.0); + const real_t zetaY = real_t(2.0) * curand_uniform(&state) - real_t(1.0); + const real_t zetaZ = real_t(2.0) * curand_uniform(&state) - real_t(1.0); + */ + + const real_t a = 0.1; // math::realRandom(real_c(0.0), real_c(1.0)); + const real_t phi = 0.2; //math::realRandom(real_c(0.0), real_c(1.0)) * pi2; + const real_t psi = 0.3; //math::realRandom(real_c(0.0), real_c(1.0)) * pi2; + const real_t zetaX = -0.4; //math::realRandom(real_c(-1.0), real_c(1.0)); + const real_t zetaY = -0.6; //math::realRandom(real_c(-1.0), real_c(1.0)); + const real_t zetaZ = 0.8; // math::realRandom(real_c(-1.0), real_c(1.0)); + + const real_t theta = acos(real_t(2.0) * a - real_t(1.0)); + + const real_t kappaX = cos(phi) * sin(theta) * kappaN; + const real_t kappaY = sin(theta) * sin(phi) * kappaN; + const real_t kappaZ = cos(theta) * kappaN; + + const real_t zettaLength = sqrt(zetaX*zetaX + zetaY*zetaY + zetaZ*zetaZ); + + const real_t zetaXNormalized = zetaX / zettaLength; + const real_t zetaYNormalized = zetaY / zettaLength; + const real_t zetaZNormalized = zetaZ / zettaLength; + + const real_t sigmaX = kappaY * zetaZNormalized - kappaZ * zetaYNormalized; + const real_t sigmaY = kappaZ * zetaXNormalized - kappaX * zetaZNormalized; + const real_t sigmaZ = kappaX * zetaYNormalized - kappaY * zetaXNormalized; + + const real_t sigmaLength = sqrt(sigmaX*sigmaX + sigmaY*sigmaY + sigmaZ*sigmaZ); + + const real_t sigmaXNormalized = sigmaX / sigmaLength; + const real_t sigmaYNormalized = sigmaY / sigmaLength; + const real_t sigmaZNormalized = sigmaZ / sigmaLength; + + const real_t arg = real_t(2.0) * ut * cos(kappaX * pX + kappaY * pY + kappaZ * pZ + psi); + + *rX = arg * sigmaXNormalized; + *rY = arg * sigmaYNormalized; + *rZ = arg * sigmaZNormalized; + +} + + +static __global__ void summOfFourrierModesKernel(double * velocity, double * const kappa, double * const ut, + const real_t yMin, const real_t zMin, const real_t minCornerY, const real_t minCornerZ, + const real_t dx, const real_t ucX, const real_t ucY, const real_t ucZ, + const real_t t, const uint_t n, const real_t latticeVelocity, + const int64_t xSize, const int64_t ySize, const int64_t zSize, + const int64_t xStride, const int64_t yStride, const int64_t zStride, const int64_t fStride) +{ + if(blockDim.x*blockIdx.x + threadIdx.x < xSize && blockDim.y*blockIdx.y + threadIdx.y < ySize && blockDim.z*blockIdx.z + threadIdx.z < zSize) + { + const int64_t x = 0; // blockDim.x*blockIdx.x + threadIdx.x; + const int64_t y = blockDim.y*blockIdx.y + threadIdx.y; + const int64_t z = blockDim.z*blockIdx.z + threadIdx.z; + + + real_t resultX = 0.0; + real_t resultY = 0.0; + real_t resultZ = 0.0; + + const real_t xCenter = real_t(0.0); + const real_t yCenter = (yMin + ( real_t( y ) + real_t(0.5) ) * dx) + minCornerY; + const real_t zCenter = (zMin + ( real_t( z ) + real_t(0.5) ) * dx) + minCornerZ; + const real_t pi2 = real_t(2.0) * real_t(3.14159265358979323846); + + const real_t pX = xCenter - (t * ucX); + const real_t pY = yCenter - (t * ucY); + const real_t pZ = zCenter - (t * ucZ); + + for(int64_t i = 0; i < n; ++i){ + + double rX, rY, rZ; + singleMode(&rX, &rY, &rZ, kappa[i], ut[i], pX, pY, pZ); + + resultX += rX; + resultY += rY; + resultZ += rZ; + } + + + velocity[xStride*x + yStride*y + zStride*z + 0*fStride] = resultX; + velocity[xStride*x + yStride*y + zStride*z + 1*fStride] = resultY; + velocity[xStride*x + yStride*y + zStride*z + 2*fStride] = resultZ; + + } + +} +} + + +void getBlockAndGridSize(Vector3<uint_t> s, Vector3<uint32_t>& block, Vector3<uint32_t>& grid){ + + const int64_t b0 = 128; + const int64_t b1 = 1; + const int64_t b2 = 1; + + const int64_t x0 = int64_c(s[0]); + const int64_t x1 = int64_c(s[1]); + const int64_t x2 = int64_c(s[2]); + + block[0] = uint32_c((1024 < ((b0 < x0) ? b0 : x0)) ? 1024 : ((b0 < x0) ? b0 : x0)); + block[1] = uint32_c((1024 < ((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))) ? 1024 : ((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))); + block[2] = uint32_c((64 < ((x2 < b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))))) ? x2 : b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))))))) ? 64 : ((x2 < b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))))) ? x2 : b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))))))); + grid[0] = uint32_c((x0) % (((1024 < ((b0 < x0) ? b0 : x0)) ? 1024 : ((b0 < x0) ? b0 : x0))) == 0 ? (int64_t)(x0) / (int64_t)(((1024 < ((b0 < x0) ? b0 : x0)) ? 1024 : ((b0 < x0) ? b0 : x0))) : ( (int64_t)(x0) / (int64_t)(((1024 < ((b0 < x0) ? b0 : x0)) ? 1024 : ((b0 < x0) ? b0 : x0))) ) +1 ); + grid[1] = uint32_c((x1) % (((1024 < ((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))) ? 1024 : ((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))))) == 0 ? (int64_t)(x1) / (int64_t)(((1024 < ((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))) ? 1024 : ((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))))) : ( (int64_t)(x1) / (int64_t)(((1024 < ((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))) ? 1024 : ((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))))) ) +1 ); + grid[2] = uint32_c((x2) % (((64 < ((x2 < b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))))) ? x2 : b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))))))) ? 64 : ((x2 < b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))))) ? x2 : b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))))))) == 0 ? (int64_t)(x2) / (int64_t)(((64 < ((x2 < b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))))) ? x2 : b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))))))) ? 64 : ((x2 < b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))))) ? x2 : b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))))))) : ( (int64_t)(x2) / (int64_t)(((64 < ((x2 < b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))))) ? x2 : b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))))))) ? 64 : ((x2 < b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))))) ? x2 : b2*((int64_t)(b0*b1) / (int64_t)(((b0 < x0) ? b0 : x0)*((x1 < b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0)))) ? x1 : b1*((int64_t)(b0) / (int64_t)(((b0 < x0) ? b0 : x0))))))))) ) +1 ); +} + +void updateVelocityPlane(gpu::GPUField<real_t>* velocityGPU, real_t* kappa, real_t* ut, const Vector3<real_t>& globalMinCorner, const Vector3<real_t>& blockMinCorner, const Vector3<real_t>& uc, + const real_t dx, const real_t t, const uint_t n, const real_t latticeVelocity, + const Vector3<uint32_t>& block, const Vector3<uint32_t>& grid){ + + dim3 block_(block[0], block[1], block[2]); + dim3 grid_(grid[0], grid[1], grid[2]); + + double * data_velocity = velocityGPU->dataAt(0, 0, 0, 0); + const int64_t size_velocity_0 = int64_c(velocityGPU->xSize()); + const int64_t size_velocity_1 = int64_c(velocityGPU->ySize()); + const int64_t size_velocity_2 = int64_c(velocityGPU->zSize()); + const int64_t stride_velocity_0 = int64_c(velocityGPU->xStride()); + const int64_t stride_velocity_1 = int64_c(velocityGPU->yStride()); + const int64_t stride_velocity_2 = int64_c(velocityGPU->zStride()); + const int64_t stride_velocity_3 = int64_c(velocityGPU->fStride()); + internal::summOfFourrierModesKernel<<<grid_, block_, 0, nullptr>>>(data_velocity, kappa, ut, + globalMinCorner[1], globalMinCorner[2], blockMinCorner[1], blockMinCorner[2], + dx, uc[0], uc[1], uc[2], t, n, latticeVelocity, + size_velocity_0, size_velocity_1, size_velocity_2, + stride_velocity_0, stride_velocity_1, stride_velocity_2, stride_velocity_3); + +} + + +} // namespace walberla \ No newline at end of file diff --git a/apps/showcases/PorousMediaGPU/utilGPU.h b/apps/showcases/PorousMediaGPU/utilGPU.h new file mode 100644 index 000000000..2e48ae449 --- /dev/null +++ b/apps/showcases/PorousMediaGPU/utilGPU.h @@ -0,0 +1,37 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file utilGPU.h +//! \author Markus Holzer <markus.holzer@fau.de> +// +//====================================================================================================================== +#pragma once + +#include "core/DataTypes.h" +#include "core/math/Constants.h" + +#include "gpu/GPUWrapper.h" +#include "gpu/ErrorChecking.h" +#include "gpu/FieldCopy.h" +#include "gpu/GPUField.h" + +namespace walberla{ +void getBlockAndGridSize(Vector3<uint_t> s, Vector3<uint32_t>& block, Vector3<uint32_t>& grid); + +void updateVelocityPlane(gpu::GPUField<real_t>* velocityGPU, real_t* kappa, real_t* ut, const Vector3<real_t>& globalMinCorner, const Vector3<real_t>& blockMinCorner, const Vector3<real_t>& uc, + const real_t dx, const real_t t, const uint_t n, const real_t latticeVelocity, + const Vector3<uint32_t>& block, const Vector3<uint32_t>& grid); + +} // namespace walberla \ No newline at end of file -- GitLab