diff --git a/apps/showcases/CMakeLists.txt b/apps/showcases/CMakeLists.txt index bbc87187124556b5e7e18591b4e3b8c4f4880f41..6f14223a16c0634db9e84e9e4ed946aa04fde4e2 100644 --- a/apps/showcases/CMakeLists.txt +++ b/apps/showcases/CMakeLists.txt @@ -13,6 +13,7 @@ if ( WALBERLA_BUILD_WITH_CODEGEN) add_subdirectory( FlowAroundSphere ) add_subdirectory( FlowAroundCylinder ) add_subdirectory( Channel ) + add_subdirectory( TaylorGreenVortex ) if (WALBERLA_BUILD_WITH_PYTHON) add_subdirectory( PhaseFieldAllenCahn ) diff --git a/apps/showcases/FlowAroundSphere/FlowAroundSphere.cpp b/apps/showcases/FlowAroundSphere/FlowAroundSphere.cpp index 2df81727fc7cc8bbaee915f4b7222d99b9a4544a..6b09aa83e5fc7c86cdf882f205532b8ff8ef8fb9 100644 --- a/apps/showcases/FlowAroundSphere/FlowAroundSphere.cpp +++ b/apps/showcases/FlowAroundSphere/FlowAroundSphere.cpp @@ -424,6 +424,9 @@ int main(int argc, char** argv) memoryPerCell, processMemoryLimit, true ); sforest->saveToFile( sbffile.c_str() ); + const uint_t nBlocks = sforest->getNumberOfBlocks(); + const uint_t nCells = nBlocks * (cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]); + const memory_t totalMemory = memory_t(nCells) * memoryPerCell; WALBERLA_LOG_INFO_ON_ROOT( "Benchmark run data:" "\n- simulation parameters:" @@ -434,6 +437,11 @@ int main(int argc, char** argv) "\n + mesh levels: " << refinementLevels + uint_c(1) << "\n + resolution: " << coarseMeshSize << " - on the coarsest grid)" << "\n + resolution: " << fineMeshSize << " - on the finest grid)" << + "\n- domain properties: " + "\n + # blocks: " << nBlocks << + "\n + # cells: " << uint_c(real_c(nCells) / real_c(1e6)) << " [1e6]" + "\n + # cells sphere D: " << diameterSphere / fineMeshSize << + "\n + total memory: " << totalMemory / 1e9 << " [GB]" << "\n- simulation properties: " "\n + sphere pos.(x): " << setup.sphereXPosition * setup.dx << " [m]" << "\n + sphere pos.(y): " << setup.sphereYPosition * setup.dx << " [m]" << @@ -577,7 +585,7 @@ int main(int argc, char** argv) WALBERLA_GPU_CHECK(gpuDeviceSynchronize()) WALBERLA_GPU_CHECK(gpuPeekAtLastError()) #else - BoundaryCollection_T boundaryCollection(blocks, flagFieldID, pdfFieldID, fluidFlagUID, latticeVelocity); + BoundaryCollection_T boundaryCollection(blocks, flagFieldID, pdfFieldID, fluidFlagUID, omegaFinestLevel, latticeVelocity, wallDistanceFunctor); #endif WALBERLA_MPI_BARRIER() WALBERLA_LOG_INFO_ON_ROOT("BOUNDARY HANDLING done") diff --git a/apps/showcases/TaylorGreenVortex/CMakeLists.txt b/apps/showcases/TaylorGreenVortex/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..bcb27287bc4de5c38dc6e58103c3fef7aef3120f --- /dev/null +++ b/apps/showcases/TaylorGreenVortex/CMakeLists.txt @@ -0,0 +1,15 @@ +waLBerla_link_files_to_builddir( "*.prm" ) +waLBerla_link_files_to_builddir( "*.py" ) + +waLBerla_generate_target_from_python(NAME TaylorGreenVortexGenerated + FILE TaylorGreenVortex.py + OUT_FILES TaylorGreenVortexStorageSpecification.h TaylorGreenVortexStorageSpecification.cpp + TaylorGreenVortexSweepCollection.h TaylorGreenVortexSweepCollection.cpp + NoSlip.h NoSlip.cpp + UBB.h UBB.cpp + TaylorGreenVortexBoundaryCollection.h + TaylorGreenVortexHeader.h) + +waLBerla_add_executable ( NAME TaylorGreenVortex + FILES TaylorGreenVortex.cpp + DEPENDS TaylorGreenVortexGenerated blockforest core field geometry lbm_generated timeloop ) diff --git a/apps/showcases/TaylorGreenVortex/TaylorGreenVortex.cpp b/apps/showcases/TaylorGreenVortex/TaylorGreenVortex.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1afd51159be02a62a96b1c0ae0a863df20ae776a --- /dev/null +++ b/apps/showcases/TaylorGreenVortex/TaylorGreenVortex.cpp @@ -0,0 +1,1065 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file TaylorGreenVortex.cpp +//! \author Markus Holzer <markus.holzer@fau.de> +//! \brief TaylorGreenVortex +// +//====================================================================================================================== +#include "blockforest/Initialization.h" +#include "blockforest/SetupBlockForest.h" +#include "blockforest/communication/UniformBufferedScheme.h" +#include "blockforest/loadbalancing/StaticCurve.h" + +#include "core/DataTypes.h" +#include "core/Environment.h" +#include "core/MemoryUsage.h" +#include "core/SharedFunctor.h" +#include "core/debug/TestSubsystem.h" +#include "core/logging/Initialization.h" +#include "core/math/Vector3.h" +#include "core/timing/RemainingTimeLogger.h" + +#include "field/AddToStorage.h" +#include "field/FlagField.h" +#include "field/GhostLayerField.h" +#include "field/vtk/VTKWriter.h" + +#include "geometry/InitBoundaryHandling.h" + +#include "timeloop/SweepTimeloop.h" + +#include "lbm_generated/communication/UniformGeneratedPdfPackInfo.h" +#include "lbm_generated/evaluation/PerformanceEvaluation.h" +#include "lbm_generated/field/AddToStorage.h" +#include "lbm_generated/field/PdfField.h" + +// include the generated header file. It includes all generated classes +#include "TaylorGreenVortexHeader.h" + +using namespace walberla; +using namespace std::placeholders; + +using StorageSpecification_T = lbm::TaylorGreenVortexStorageSpecification; +using Stencil_T = StorageSpecification_T::Stencil; +using CommunicationStencil_T = StorageSpecification_T::CommunicationStencil; +using PdfField_T = lbm_generated::PdfField< StorageSpecification_T >; +using PackInfo_T = lbm_generated::UniformGeneratedPdfPackInfo< PdfField_T >; + +using SweepCollection_T = lbm::TaylorGreenVortexSweepCollection; + +using VectorField_T = GhostLayerField< real_t, StorageSpecification_T::Stencil::D >; +using ScalarField_T = GhostLayerField< real_t, 1 >; + +using flag_t = walberla::uint8_t; +using FlagField_T = FlagField< flag_t >; +using BoundaryCollection_T = lbm::TaylorGreenVortexBoundaryCollection< FlagField_T >; + +using blockforest::communication::UniformBufferedScheme; + +struct IDs +{ + BlockDataID pdfField; + BlockDataID pdfFieldTmp; + BlockDataID velocityField; + BlockDataID densityField; + BlockDataID avgVelField; + BlockDataID avgVelSqrField; + BlockDataID avgPressureField; + BlockDataID flagField; + + BlockDataID pdfFieldGPU; + BlockDataID velocityFieldGPU; + BlockDataID densityFieldGPU; +}; + +struct Setup +{ + Vector3< uint_t > blocks; + Vector3< uint_t > cellsPerBlock; + Vector3< uint_t > cells; + Vector3< bool > periodic; + uint_t numGhostLayers; +}; + +class AccuracyEvaluation +{ + public: + AccuracyEvaluation(std::shared_ptr< StructuredBlockForest >& blocks, const IDs& ids, + SweepCollection_T& sweepCollection, const real_t maxLatticeVelocity, const uint_t logFrequency, + const bool logToStream, const bool logToFile) + : blocks_(blocks), ids_(ids), sweepCollection_(sweepCollection), maxLatticeVelocity_(maxLatticeVelocity), + executionCounter_(uint_c(0)), filename_("TaylorGreenVortex.txt"), normalizationFactor_(real_c(1.0)) + { + logFrequency_ = logToStream ? logFrequency : uint_c(0); + plotFrequency_ = logToFile ? logFrequency : uint_c(0); + } + + void setNormalizationFactor(const real_t f) { normalizationFactor_ = f; } + void setFilename(const std::string& filename) { filename_ = filename; } + + real_t L1() const { return L1_; } + real_t L2() const { return L2_; } + real_t Lmax() const { return Lmax_; } + + void operator()() + { + if (logFrequency_ == uint_c(0) && (plotFrequency_ == uint_c(0) || filename_.empty())) return; + + ++executionCounter_; + + const bool plot = (plotFrequency_ != uint_c(0) && (executionCounter_ - uint_c(1)) % plotFrequency_ == uint_c(0) && + !filename_.empty()); + const bool log = (logFrequency_ != uint_c(0) && (executionCounter_ - uint_c(1)) % logFrequency_ == uint_c(0)); + + if (!log && !plot) return; + + const auto& domainAABB = blocks_->getDomain(); + + real_t _L1(real_c(0.0)); + real_t _L2(real_c(0.0)); + real_t _Lmax(real_c(0.0)); + + for (auto block = blocks_->begin(); block != blocks_->end(); ++block) + { + sweepCollection_.calculateMacroscopicParameters(block.get()); + const VectorField_T* velocityField = block->template getData< const VectorField_T >(ids_.velocityField); + + const auto level = blocks_->getLevel(*block); + const real_t volumeFraction = + (blocks_->dx(level) * blocks_->dy(level) * blocks_->dz(level)) / domainAABB.volume(); + + for (auto it = velocityField->beginXYZ(); it != velocityField->end(); ++it) + { + Vector3< real_t > center = blocks_->getBlockLocalCellCenter(*block, Cell(it.x(), it.y(), it.z())); + + Vector3< real_t > exactVelocity( + real_c(maxLatticeVelocity_ * (center[1] - real_c(domainAABB.yMin())) / real_c(domainAABB.ySize())), + real_c(0.0), real_c(0.0)); + Vector3< real_t > velocity(it.getF(0), it.getF(1), it.getF(2)); + + const auto error = velocity - exactVelocity; + const real_t diff = error.length(); + + _L1 += diff * volumeFraction; + _L2 += diff * diff * volumeFraction; + _Lmax = std::max(_Lmax, diff); + } + } + + mpi::reduceInplace(_L1, mpi::SUM); + mpi::reduceInplace(_L2, mpi::SUM); + mpi::reduceInplace(_Lmax, mpi::MAX); + _L2 = std::sqrt(_L2); + + L1_ = _L1; + L2_ = _L2; + Lmax_ = _Lmax; + + WALBERLA_ROOT_SECTION() + { + if (plot && executionCounter_ == uint_t(1)) + { + std::ofstream file(filename_.c_str()); + file << "# accuracy evaluation" + << "# step [1], L1 [2], L2 [3], Lmax [4]" << std::endl; + file.close(); + } + + if (log) + { + WALBERLA_LOG_INFO("Evaluation of accuracy:" + << "\n - L1: " << L1_ << "\n - L2: " << L2_ << "\n - Lmax: " << Lmax_); + } + + if (plot) + { + std::ofstream file(filename_.c_str(), std::ofstream::out | std::ofstream::app); + file << (executionCounter_ - uint_t(1)) << " " << L1_ << " " << L2_ << " " << Lmax_ << std::endl; + file.close(); + } + } + }; + + private: + std::shared_ptr< StructuredBlockForest > blocks_; + IDs ids_; + SweepCollection_T& sweepCollection_; + real_t maxLatticeVelocity_; + + uint_t executionCounter_; + + uint_t plotFrequency_; + uint_t logFrequency_; + + std::string filename_; + + real_t normalizationFactor_; + + real_t L1_; + real_t L2_; + real_t Lmax_; + +}; // class AccuracyEvaluation + +namespace +{ +void workloadMemoryAndSUIDAssignment(SetupBlockForest& forest, const memory_t memoryPerBlock) +{ + for (auto block = forest.begin(); block != forest.end(); ++block) + { + block->setWorkload(numeric_cast< workload_t >(uint_t(1) << block->getLevel())); + block->setMemory(memoryPerBlock); + } +} + +shared_ptr< SetupBlockForest >createSetupBlockForest(const Setup& setup, uint_t numberOfProcesses, const memory_t memoryPerCell) +{ + shared_ptr< SetupBlockForest > forest = make_shared< SetupBlockForest >(); + + const memory_t memoryPerBlock = + numeric_cast< memory_t >((setup.cellsPerBlock[0] + uint_t(2) * setup.numGhostLayers) * + (setup.cellsPerBlock[1] + uint_t(2) * setup.numGhostLayers) * + (setup.cellsPerBlock[2] + uint_t(2) * setup.numGhostLayers)) * + memoryPerCell; + + forest->addWorkloadMemorySUIDAssignmentFunction( + std::bind(workloadMemoryAndSUIDAssignment, std::placeholders::_1, memoryPerBlock)); + + forest->init( + AABB(real_c(0), real_c(0), real_c(0), real_c(setup.cells[0]), real_c(setup.cells[1]), real_c(setup.cells[2])), + setup.blocks[0], setup.blocks[1], setup.blocks[2], setup.periodic[0], setup.periodic[1], setup.periodic[2]); + + MPIManager::instance()->useWorldComm(); + forest->balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numberOfProcesses); + // WALBERLA_LOG_INFO_ON_ROOT("SetupBlockForest created successfully:\n" << *forest); + + return forest; +} + +shared_ptr< blockforest::StructuredBlockForest >createStructuredBlockForest(const Setup& setup, const memory_t memoryPerCell) +{ + // WALBERLA_LOG_INFO_ON_ROOT("Creating the block structure ..."); + shared_ptr< SetupBlockForest > sforest = createSetupBlockForest(setup, uint_c(MPIManager::instance()->numProcesses()), memoryPerCell); + + auto bf = std::make_shared< blockforest::BlockForest >(uint_c(MPIManager::instance()->rank()), *sforest, false); + auto sbf = std::make_shared< blockforest::StructuredBlockForest >(bf, setup.cellsPerBlock[0], setup.cellsPerBlock[1], + setup.cellsPerBlock[2]); + sbf->createCellBoundingBoxes(); + + return sbf; +} +} // namespace + +class StreamCollide +{ + public: + StreamCollide(const shared_ptr< StructuredBlockForest > & blocks, const IDs& ids, const real_t omega) : ids_(ids) + { + + std::map<int, int> m{{-1, 7}, {0, 0}, {1, 0}}; + _nBlocks = blocks->getNumberOfBlocks(); + blocks->getBlocks(blockVector, 0); + for (uint_t level = 0; level < blocks->getNumberOfLevels(); level++) + { + const double level_scale_factor = double(uint_t(1) << level); + const double one = double(1.0); + const double half = double(0.5); + + omegaVector.push_back( double(omega / (level_scale_factor * (-omega * half + one) + omega * half)) ); + } + + for( auto it = blocks->begin(); it != blocks->end(); ++it ) + { + auto* local = dynamic_cast< Block* >(it.get()); + for (cell_idx_t ctr_2 = -1; ctr_2 < 2; ++ctr_2) + { + for (cell_idx_t ctr_1 = -1; ctr_1 < 2; ++ctr_1) + { + for (cell_idx_t ctr_0 = -1; ctr_0 < 2; ++ctr_0) + { + auto dir = stencil::vectorToDirection(ctr_0, ctr_1, ctr_2); + if(dir == stencil::C) + { + PdfField_T* localPdfField = local->getData< PdfField_T >(ids_.pdfField); + PdfField_T* localPdfFieldTmp = local->getData< PdfField_T >(ids_.pdfFieldTmp); + + pdfs.push_back(localPdfField->dataAt(0, 0, 0, 0)); + pdfsTmp.push_back(localPdfFieldTmp->dataAt(0, 0, 0, 0)); + + _interval = localPdfField->xyzSize(); + _stride_pdfs_0 = int64_t(localPdfField->xStride()); + _stride_pdfs_1 = int64_t(localPdfField->yStride()); + _stride_pdfs_2 = int64_t(localPdfField->zStride()); + _stride_pdfs_3 = int64_t(localPdfField->fStride()); + } + else + { + const auto neighborIdx = blockforest::getBlockNeighborhoodSectionIndex(dir); + if (!(local->neighborhoodSectionHasEquallySizedBlock(neighborIdx))) continue; + + const BlockID& neighborId = local->getNeighborId(neighborIdx, uint_t(0)); + auto neighbor = dynamic_cast< Block* >(blocks->getBlock(neighborId)); + + PdfField_T* localPdfField = neighbor->getData< PdfField_T >(ids_.pdfField); + PdfField_T* localPdfFieldTmp = neighbor->getData< PdfField_T >(ids_.pdfFieldTmp); + + pdfs.push_back(localPdfField->dataAt(m[stencil::cx[dir]], m[stencil::cy[dir]], m[stencil::cz[dir]], 0)); + pdfsTmp.push_back(localPdfFieldTmp->dataAt(m[stencil::cx[dir]], m[stencil::cy[dir]], m[stencil::cz[dir]], 0)); + } + } + } + } + } + } + + void streamCollide(Block * block) + { + auto pdfField = block->getData<PdfField_T>(ids_.pdfField); + auto pdfFieldTmp = block->getData<PdfField_T>(ids_.pdfFieldTmp); + CellInterval ci = pdfField->xyzSize(); + + const uint_t level = block->getLevel(); + double omega = omegaVector[level]; + + const double xi_0 = ((1.0) / (omega*-0.25 + 2.0)); + const double rr_0 = xi_0*(omega*-2.0 + 4.0); + + for (auto it: ci) + { + const cell_idx_t ctr_2 = it.z(); + const cell_idx_t ctr_1 = it.y(); + const cell_idx_t ctr_0 = it.x(); + const double f_0 = pdfField->get(ctr_0, ctr_1, ctr_2, 0); + const double f_1 = pdfField->get(ctr_0, ctr_1 - 1, ctr_2, 1); + const double f_2 = pdfField->get(ctr_0, ctr_1 + 1, ctr_2, 2); + const double f_3 = pdfField->get(ctr_0 + 1, ctr_1, ctr_2, 3); + const double f_4 = pdfField->get(ctr_0 - 1, ctr_1, ctr_2, 4); + const double f_5 = pdfField->get(ctr_0, ctr_1, ctr_2 - 1, 5); + const double f_6 = pdfField->get(ctr_0, ctr_1, ctr_2 + 1, 6); + const double f_7 = pdfField->get(ctr_0 + 1, ctr_1 - 1, ctr_2, 7); + const double f_8 = pdfField->get(ctr_0 - 1, ctr_1 - 1, ctr_2, 8); + const double f_9 = pdfField->get(ctr_0 + 1, ctr_1 + 1, ctr_2, 9); + const double f_10 = pdfField->get(ctr_0 - 1, ctr_1 + 1, ctr_2, 10); + const double f_11 = pdfField->get(ctr_0, ctr_1 - 1, ctr_2 - 1, 11); + const double f_12 = pdfField->get(ctr_0, ctr_1 + 1, ctr_2 - 1, 12); + const double f_13 = pdfField->get(ctr_0 + 1, ctr_1, ctr_2 - 1, 13); + const double f_14 = pdfField->get(ctr_0 - 1, ctr_1, ctr_2 - 1, 14); + const double f_15 = pdfField->get(ctr_0, ctr_1 - 1, ctr_2 + 1, 15); + const double f_16 = pdfField->get(ctr_0, ctr_1 + 1, ctr_2 + 1, 16); + const double f_17 = pdfField->get(ctr_0 + 1, ctr_1, ctr_2 + 1, 17); + const double f_18 = pdfField->get(ctr_0 - 1, ctr_1, ctr_2 + 1, 18); + const double f_19 = pdfField->get(ctr_0 - 1, ctr_1 - 1, ctr_2 - 1, 19); + const double f_20 = pdfField->get(ctr_0 + 1, ctr_1 - 1, ctr_2 - 1, 20); + const double f_21 = pdfField->get(ctr_0 - 1, ctr_1 + 1, ctr_2 - 1, 21); + const double f_22 = pdfField->get(ctr_0 + 1, ctr_1 + 1, ctr_2 - 1, 22); + const double f_23 = pdfField->get(ctr_0 - 1, ctr_1 - 1, ctr_2 + 1, 23); + const double f_24 = pdfField->get(ctr_0 + 1, ctr_1 - 1, ctr_2 + 1, 24); + const double f_25 = pdfField->get(ctr_0 - 1, ctr_1 + 1, ctr_2 + 1, 25); + const double f_26 = pdfField->get(ctr_0 + 1, ctr_1 + 1, ctr_2 + 1, 26); + + const double vel0Term = f_10 + f_14 + f_18 + f_19 + f_21 + f_23 + f_25 + f_4 + f_8; + const double vel1Term = f_1 + f_11 + f_15 + f_20 + f_24 + f_7; + const double vel2Term = f_12 + f_13 + f_22 + f_5; + const double delta_rho = f_0 + f_16 + f_17 + f_2 + f_26 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term; + const double rho = delta_rho + 1.0; + const double xi_1 = ((1.0) / (rho)); + const double u_0 = xi_1*(-f_13 - f_17 - f_20 - f_22 - f_24 - f_26 - f_3 - f_7 - f_9 + vel0Term); + const double u_1 = xi_1*(-f_10 - f_12 - f_16 + f_19 - f_2 - f_21 - f_22 + f_23 - f_25 - f_26 + f_8 - f_9 + vel1Term); + const double u_2 = xi_1*(f_11 + f_14 - f_15 - f_16 - f_17 - f_18 + f_19 + f_20 + f_21 - f_23 - f_24 - f_25 - f_26 - f_6 + vel2Term); + const double u0Mu1 = u_0 - u_1; + const double u0Pu1 = u_0 + u_1; + const double u1Pu2 = u_1 + u_2; + const double u1Mu2 = u_1 - u_2; + const double u0Mu2 = u_0 - u_2; + const double u0Pu2 = u_0 + u_2; + const double f_eq_common = delta_rho + rho*-1.5*(u_0*u_0) + rho*-1.5*(u_1*u_1) + rho*-1.5*(u_2*u_2); + + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 0) = f_0 + omega*(-f_0 + f_eq_common*0.29629629629629628); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 1) = f_1 + omega*(f_1*-0.5 + f_2*-0.5 + f_eq_common*0.07407407407407407 + rho*0.33333333333333331*(u_1*u_1)) + rr_0*(f_1*-0.5 + f_2*0.5 + rho*u_1*0.22222222222222221); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 2) = f_2 + omega*(f_1*-0.5 + f_2*-0.5 + f_eq_common*0.07407407407407407 + rho*0.33333333333333331*(u_1*u_1)) + rr_0*(f_1*0.5 + f_2*-0.5 + rho*u_1*-0.22222222222222221); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 3) = f_3 + omega*(f_3*-0.5 + f_4*-0.5 + f_eq_common*0.07407407407407407 + rho*0.33333333333333331*(u_0*u_0)) + rr_0*(f_3*-0.5 + f_4*0.5 + rho*u_0*-0.22222222222222221); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 4) = f_4 + omega*(f_3*-0.5 + f_4*-0.5 + f_eq_common*0.07407407407407407 + rho*0.33333333333333331*(u_0*u_0)) + rr_0*(f_3*0.5 + f_4*-0.5 + rho*u_0*0.22222222222222221); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 5) = f_5 + omega*(f_5*-0.5 + f_6*-0.5 + f_eq_common*0.07407407407407407 + rho*0.33333333333333331*(u_2*u_2)) + rr_0*(f_5*-0.5 + f_6*0.5 + rho*u_2*0.22222222222222221); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 6) = f_6 + omega*(f_5*-0.5 + f_6*-0.5 + f_eq_common*0.07407407407407407 + rho*0.33333333333333331*(u_2*u_2)) + rr_0*(f_5*0.5 + f_6*-0.5 + rho*u_2*-0.22222222222222221); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 7) = f_7 + omega*(f_10*-0.5 + f_7*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u0Mu1*u0Mu1)) + rr_0*(f_10*0.5 + f_7*-0.5 + rho*u0Mu1*-0.055555555555555552); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 8) = f_8 + omega*(f_8*-0.5 + f_9*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u0Pu1*u0Pu1)) + rr_0*(f_8*-0.5 + f_9*0.5 + rho*u0Pu1*0.055555555555555552); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 9) = f_9 + omega*(f_8*-0.5 + f_9*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u0Pu1*u0Pu1)) + rr_0*(f_8*0.5 + f_9*-0.5 + rho*u0Pu1*-0.055555555555555552); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 10) = f_10 + omega*(f_10*-0.5 + f_7*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u0Mu1*u0Mu1)) + rr_0*(f_10*-0.5 + f_7*0.5 + rho*u0Mu1*0.055555555555555552); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 11) = f_11 + omega*(f_11*-0.5 + f_16*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u1Pu2*u1Pu2)) + rr_0*(f_11*-0.5 + f_16*0.5 + rho*u1Pu2*0.055555555555555552); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 12) = f_12 + omega*(f_12*-0.5 + f_15*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u1Mu2*u1Mu2)) + rr_0*(f_12*-0.5 + f_15*0.5 + rho*u1Mu2*-0.055555555555555552); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 13) = f_13 + omega*(f_13*-0.5 + f_18*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u0Mu2*u0Mu2)) + rr_0*(f_13*-0.5 + f_18*0.5 + rho*u0Mu2*-0.055555555555555552); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 14) = f_14 + omega*(f_14*-0.5 + f_17*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u0Pu2*u0Pu2)) + rr_0*(f_14*-0.5 + f_17*0.5 + rho*u0Pu2*0.055555555555555552); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 15) = f_15 + omega*(f_12*-0.5 + f_15*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u1Mu2*u1Mu2)) + rr_0*(f_12*0.5 + f_15*-0.5 + rho*u1Mu2*0.055555555555555552); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 16) = f_16 + omega*(f_11*-0.5 + f_16*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u1Pu2*u1Pu2)) + rr_0*(f_11*0.5 + f_16*-0.5 + rho*u1Pu2*-0.055555555555555552); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 17) = f_17 + omega*(f_14*-0.5 + f_17*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u0Pu2*u0Pu2)) + rr_0*(f_14*0.5 + f_17*-0.5 + rho*u0Pu2*-0.055555555555555552); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 18) = f_18 + omega*(f_13*-0.5 + f_18*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u0Mu2*u0Mu2)) + rr_0*(f_13*0.5 + f_18*-0.5 + rho*u0Mu2*0.055555555555555552); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 19) = f_19 + omega*(delta_rho*-0.013888888888888888 + f_19*-0.5 + f_26*-0.5 + f_eq_common*0.018518518518518517 + rho*(0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2))) + rr_0*(f_19*-0.5 + f_26*0.5 + rho*(u0Pu1*0.013888888888888888 + u_2*0.013888888888888888)); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 20) = f_20 + omega*(delta_rho*-0.013888888888888888 + f_20*-0.5 + f_25*-0.5 + f_eq_common*0.018518518518518517 + rho*(0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2))) + rr_0*(f_20*-0.5 + f_25*0.5 + rho*(u0Mu1*-0.013888888888888888 + u_2*0.013888888888888888)); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 21) = f_21 + omega*(delta_rho*-0.013888888888888888 + f_21*-0.5 + f_24*-0.5 + f_eq_common*0.018518518518518517 + rho*(0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2))) + rr_0*(f_21*-0.5 + f_24*0.5 + rho*(u0Mu1*0.013888888888888888 + u_2*0.013888888888888888)); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 22) = f_22 + omega*(delta_rho*-0.013888888888888888 + f_22*-0.5 + f_23*-0.5 + f_eq_common*0.018518518518518517 + rho*(0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2))) + rr_0*(f_22*-0.5 + f_23*0.5 + rho*(u0Pu1*-0.013888888888888888 + u_2*0.013888888888888888)); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 23) = f_23 + omega*(delta_rho*-0.013888888888888888 + f_22*-0.5 + f_23*-0.5 + f_eq_common*0.018518518518518517 + rho*(0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2))) + rr_0*(f_22*0.5 + f_23*-0.5 + rho*(u0Pu1*0.013888888888888888 + u_2*-0.013888888888888888)); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 24) = f_24 + omega*(delta_rho*-0.013888888888888888 + f_21*-0.5 + f_24*-0.5 + f_eq_common*0.018518518518518517 + rho*(0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2))) + rr_0*(f_21*0.5 + f_24*-0.5 + rho*(u0Mu1*-0.013888888888888888 + u_2*-0.013888888888888888)); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 25) = f_25 + omega*(delta_rho*-0.013888888888888888 + f_20*-0.5 + f_25*-0.5 + f_eq_common*0.018518518518518517 + rho*(0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2))) + rr_0*(f_20*0.5 + f_25*-0.5 + rho*(u0Mu1*0.013888888888888888 + u_2*-0.013888888888888888)); + pdfFieldTmp->get(ctr_0, ctr_1, ctr_2, 26) = f_26 + omega*(delta_rho*-0.013888888888888888 + f_19*-0.5 + f_26*-0.5 + f_eq_common*0.018518518518518517 + rho*(0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2))) + rr_0*(f_19*0.5 + f_26*-0.5 + rho*(u0Pu1*-0.013888888888888888 + u_2*-0.013888888888888888)); + } + pdfField->swapDataPointers(pdfFieldTmp); + } + + + void streamCollideEven() + { + double omega = omegaVector[0]; + const double xi_0 = ((1.0) / (omega*-0.25 + 2.0)); + const double rr_0 = xi_0*(omega*-2.0 + 4.0); + + // std::map<int, int> m{{-1, 0}, {0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 0}}; + std::map<int, int> m{{-1, 0}, {0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 0}}; + + + for(uint_t i = 0; i < _nBlocks; ++i) + { + for (auto it: _interval) + { + const cell_idx_t ctr_2 = it.z(); + const cell_idx_t ctr_1 = it.y(); + const cell_idx_t ctr_0 = it.x(); + + const uint_t index_C = uint_c(27) * i + uint_c( (0 + 1) * 9 + (0 + 1) * 3 + 0 + 1 ); + const uint_t index_N = uint_c(27) * i + uint_c( (0 + 1) * 9 + ((ctr_1 -1 < 0 ? -1 : 0) + 1) * 3 + 0 + 1 ); + const uint_t index_S = uint_c(27) * i + uint_c( (0 + 1) * 9 + ((ctr_1 +1 > 7 ? 1 : 0) + 1) * 3 + 0 + 1 ); + const uint_t index_W = uint_c(27) * i + uint_c( (0 + 1) * 9 + (0 + 1) * 3 + (ctr_0 +1 > 7 ? 1 : 0) + 1 ); + const uint_t index_E = uint_c(27) * i + uint_c( (0 + 1) * 9 + (0 + 1) * 3 + (ctr_0 -1 < 0 ? -1 : 0) + 1 ); + const uint_t index_T = uint_c(27) * i + uint_c( ((ctr_2 -1 < 0 ? -1 : 0) + 1) * 9 + (0 + 1) * 3 + 0 + 1 ); + const uint_t index_B = uint_c(27) * i + uint_c( ((ctr_2 +1 > 7 ? 1 : 0) + 1) * 9 + (0 + 1) * 3 + 0 + 1 ); + const uint_t index_NW = uint_c(27) * i + uint_c( (0 + 1) * 9 + ((ctr_1 -1 < 0 ? -1 : 0) + 1) * 3 + (ctr_0 +1 > 7 ? 1 : 0) + 1 ); + const uint_t index_NE = uint_c(27) * i + uint_c( (0 + 1) * 9 + ((ctr_1 -1 < 0 ? -1 : 0) + 1) * 3 + (ctr_0 -1 < 0 ? -1 : 0) + 1 ); + const uint_t index_SW = uint_c(27) * i + uint_c( (0 + 1) * 9 + ((ctr_1 +1 > 7 ? 1 : 0) + 1) * 3 + (ctr_0 +1 > 7 ? 1 : 0) + 1 ); + const uint_t index_SE = uint_c(27) * i + uint_c( (0 + 1) * 9 + ((ctr_1 +1 > 7 ? 1 : 0) + 1) * 3 + (ctr_0 -1 < 0 ? -1 : 0) + 1 ); + const uint_t index_TN = uint_c(27) * i + uint_c( ((ctr_2 -1 < 0 ? -1 : 0) + 1) * 9 + ((ctr_1 -1 < 0 ? -1 : 0) + 1) * 3 + 0 + 1 ); + const uint_t index_TS = uint_c(27) * i + uint_c( ((ctr_2 -1 < 0 ? -1 : 0) + 1) * 9 + ((ctr_1 +1 > 7 ? 1 : 0) + 1) * 3 + 0 + 1 ); + const uint_t index_TW = uint_c(27) * i + uint_c( ((ctr_2 -1 < 0 ? -1 : 0) + 1) * 9 + (0 + 1) * 3 + (ctr_0 +1 > 7 ? 1 : 0) + 1 ); + const uint_t index_TE = uint_c(27) * i + uint_c( ((ctr_2 -1 < 0 ? -1 : 0) + 1) * 9 + (0 + 1) * 3 + (ctr_0 -1 < 0 ? -1 : 0) + 1 ); + const uint_t index_BN = uint_c(27) * i + uint_c( ((ctr_2 +1 > 7 ? 1 : 0) + 1) * 9 + ((ctr_1 -1 < 0 ? -1 : 0) + 1) * 3 + 0 + 1 ); + const uint_t index_BS = uint_c(27) * i + uint_c( ((ctr_2 +1 > 7 ? 1 : 0) + 1) * 9 + ((ctr_1 +1 > 7 ? 1 : 0) + 1) * 3 + 0 + 1 ); + const uint_t index_BW = uint_c(27) * i + uint_c( ((ctr_2 +1 > 7 ? 1 : 0) + 1) * 9 + (0 + 1) * 3 + (ctr_0 +1 > 7 ? 1 : 0) + 1 ); + const uint_t index_BE = uint_c(27) * i + uint_c( ((ctr_2 +1 > 7 ? 1 : 0) + 1) * 9 + (0 + 1) * 3 + (ctr_0 -1 < 0 ? -1 : 0) + 1 ); + const uint_t index_TNE = uint_c(27) * i + uint_c( ((ctr_2 -1 < 0 ? -1 : 0) + 1) * 9 + ((ctr_1 -1 < 0 ? -1 : 0) + 1) * 3 + (ctr_0 -1 < 0 ? -1 : 0) + 1 ); + const uint_t index_TNW = uint_c(27) * i + uint_c( ((ctr_2 -1 < 0 ? -1 : 0) + 1) * 9 + ((ctr_1 -1 < 0 ? -1 : 0) + 1) * 3 + (ctr_0 +1 > 7 ? 1 : 0) + 1 ); + const uint_t index_TSE = uint_c(27) * i + uint_c( ((ctr_2 -1 < 0 ? -1 : 0) + 1) * 9 + ((ctr_1 +1 > 7 ? 1 : 0) + 1) * 3 + (ctr_0 -1 < 0 ? -1 : 0) + 1 ); + const uint_t index_TSW = uint_c(27) * i + uint_c( ((ctr_2 -1 < 0 ? -1 : 0) + 1) * 9 + ((ctr_1 +1 > 7 ? 1 : 0) + 1) * 3 + (ctr_0 +1 > 7 ? 1 : 0) + 1 ); + const uint_t index_BNE = uint_c(27) * i + uint_c( ((ctr_2 +1 > 7 ? 1 : 0) + 1) * 9 + ((ctr_1 -1 < 0 ? -1 : 0) + 1) * 3 + (ctr_0 -1 < 0 ? -1 : 0) + 1 ); + const uint_t index_BNW = uint_c(27) * i + uint_c( ((ctr_2 +1 > 7 ? 1 : 0) + 1) * 9 + ((ctr_1 -1 < 0 ? -1 : 0) + 1) * 3 + (ctr_0 +1 > 7 ? 1 : 0) + 1 ); + const uint_t index_BSE = uint_c(27) * i + uint_c( ((ctr_2 +1 > 7 ? 1 : 0) + 1) * 9 + ((ctr_1 +1 > 7 ? 1 : 0) + 1) * 3 + (ctr_0 -1 < 0 ? -1 : 0) + 1 ); + const uint_t index_BSW = uint_c(27) * i + uint_c( ((ctr_2 +1 > 7 ? 1 : 0) + 1) * 9 + ((ctr_1 +1 > 7 ? 1 : 0) + 1) * 3 + (ctr_0 +1 > 7 ? 1 : 0) + 1 ); +// +// WALBERLA_LOG_DEVEL_VAR(ctr_1) +// WALBERLA_LOG_DEVEL_VAR(((ctr_1 -1 < 0 ? -1 : 0) + 1)) +// WALBERLA_LOG_DEVEL_VAR(index_N) + + const double f_0 = pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 0*_stride_pdfs_3]; + const double f_1 = pdfs[index_N][ctr_0*_stride_pdfs_0 + m[ctr_1 - 1]*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 1*_stride_pdfs_3]; + const double f_2 = pdfs[index_S][ctr_0*_stride_pdfs_0 + m[ctr_1 + 1]*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 2*_stride_pdfs_3]; + const double f_3 = pdfs[index_W][m[ctr_0 + 1]*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 3*_stride_pdfs_3]; + const double f_4 = pdfs[index_E][m[ctr_0 - 1]*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 4*_stride_pdfs_3]; + const double f_5 = pdfs[index_T][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + m[ctr_2 - 1]*_stride_pdfs_2 + 5*_stride_pdfs_3]; + const double f_6 = pdfs[index_B][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + m[ctr_2 + 1]*_stride_pdfs_2 + 6*_stride_pdfs_3]; + const double f_7 = pdfs[index_NW][m[ctr_0 + 1]*_stride_pdfs_0 + m[ctr_1 - 1]*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 7*_stride_pdfs_3]; + const double f_8 = pdfs[index_NE][m[ctr_0 - 1]*_stride_pdfs_0 + m[ctr_1 - 1]*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 8*_stride_pdfs_3]; + const double f_9 = pdfs[index_SW][m[ctr_0 + 1]*_stride_pdfs_0 + m[ctr_1 + 1]*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 9*_stride_pdfs_3]; + const double f_10 = pdfs[index_SE][m[ctr_0 - 1]*_stride_pdfs_0 + m[ctr_1 + 1]*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 10*_stride_pdfs_3]; + const double f_11 = pdfs[index_TN][ctr_0*_stride_pdfs_0 + m[ctr_1 - 1]*_stride_pdfs_1 + m[ctr_2 - 1]*_stride_pdfs_2 + 11*_stride_pdfs_3]; + const double f_12 = pdfs[index_TS][ctr_0*_stride_pdfs_0 + m[ctr_1 + 1]*_stride_pdfs_1 + m[ctr_2 - 1]*_stride_pdfs_2 + 12*_stride_pdfs_3]; + const double f_13 = pdfs[index_TW][m[ctr_0 + 1]*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + m[ctr_2 - 1]*_stride_pdfs_2 + 13*_stride_pdfs_3]; + const double f_14 = pdfs[index_TE][m[ctr_0 - 1]*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + m[ctr_2 - 1]*_stride_pdfs_2 + 14*_stride_pdfs_3]; + const double f_15 = pdfs[index_BN][ctr_0*_stride_pdfs_0 + m[ctr_1 - 1]*_stride_pdfs_1 + m[ctr_2 + 1]*_stride_pdfs_2 + 15*_stride_pdfs_3]; + const double f_16 = pdfs[index_BS][ctr_0*_stride_pdfs_0 + m[ctr_1 + 1]*_stride_pdfs_1 + m[ctr_2 + 1]*_stride_pdfs_2 + 16*_stride_pdfs_3]; + const double f_17 = pdfs[index_BW][m[ctr_0 + 1]*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + m[ctr_2 + 1]*_stride_pdfs_2 + 17*_stride_pdfs_3]; + const double f_18 = pdfs[index_BE][m[ctr_0 - 1]*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + m[ctr_2 + 1]*_stride_pdfs_2 + 18*_stride_pdfs_3]; + const double f_19 = pdfs[index_TNE][m[ctr_0 - 1]*_stride_pdfs_0 + m[ctr_1 - 1]*_stride_pdfs_1 + m[ctr_2 - 1]*_stride_pdfs_2 + 19*_stride_pdfs_3]; + const double f_20 = pdfs[index_TNW][m[ctr_0 + 1]*_stride_pdfs_0 + m[ctr_1 - 1]*_stride_pdfs_1 + m[ctr_2 - 1]*_stride_pdfs_2 + 20*_stride_pdfs_3]; + const double f_21 = pdfs[index_TSE][m[ctr_0 - 1]*_stride_pdfs_0 + m[ctr_1 + 1]*_stride_pdfs_1 + m[ctr_2 - 1]*_stride_pdfs_2 + 21*_stride_pdfs_3]; + const double f_22 = pdfs[index_TSW][m[ctr_0 + 1]*_stride_pdfs_0 + m[ctr_1 + 1]*_stride_pdfs_1 + m[ctr_2 - 1]*_stride_pdfs_2 + 22*_stride_pdfs_3]; + const double f_23 = pdfs[index_BNE][m[ctr_0 - 1]*_stride_pdfs_0 + m[ctr_1 - 1]*_stride_pdfs_1 + m[ctr_2 + 1]*_stride_pdfs_2 + 23*_stride_pdfs_3]; + const double f_24 = pdfs[index_BNW][m[ctr_0 + 1]*_stride_pdfs_0 + m[ctr_1 - 1]*_stride_pdfs_1 + m[ctr_2 + 1]*_stride_pdfs_2 + 24*_stride_pdfs_3]; + const double f_25 = pdfs[index_BSE][m[ctr_0 - 1]*_stride_pdfs_0 + m[ctr_1 + 1]*_stride_pdfs_1 + m[ctr_2 + 1]*_stride_pdfs_2 + 25*_stride_pdfs_3]; + const double f_26 = pdfs[index_BSW][m[ctr_0 + 1]*_stride_pdfs_0 + m[ctr_1 + 1]*_stride_pdfs_1 + m[ctr_2 + 1]*_stride_pdfs_2 + 26*_stride_pdfs_3]; + + const double vel0Term = f_10 + f_14 + f_18 + f_19 + f_21 + f_23 + f_25 + f_4 + f_8; + const double vel1Term = f_1 + f_11 + f_15 + f_20 + f_24 + f_7; + const double vel2Term = f_12 + f_13 + f_22 + f_5; + const double delta_rho = f_0 + f_16 + f_17 + f_2 + f_26 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term; + const double rho = delta_rho + 1.0; + const double xi_1 = ((1.0) / (rho)); + const double u_0 = xi_1*(-f_13 - f_17 - f_20 - f_22 - f_24 - f_26 - f_3 - f_7 - f_9 + vel0Term); + const double u_1 = xi_1*(-f_10 - f_12 - f_16 + f_19 - f_2 - f_21 - f_22 + f_23 - f_25 - f_26 + f_8 - f_9 + vel1Term); + const double u_2 = xi_1*(f_11 + f_14 - f_15 - f_16 - f_17 - f_18 + f_19 + f_20 + f_21 - f_23 - f_24 - f_25 - f_26 - f_6 + vel2Term); + const double u0Mu1 = u_0 - u_1; + const double u0Pu1 = u_0 + u_1; + const double u1Pu2 = u_1 + u_2; + const double u1Mu2 = u_1 - u_2; + const double u0Mu2 = u_0 - u_2; + const double u0Pu2 = u_0 + u_2; + const double f_eq_common = delta_rho + rho*-1.5*(u_0*u_0) + rho*-1.5*(u_1*u_1) + rho*-1.5*(u_2*u_2); + + const double r_0 = f_0 + omega*(-f_0 + f_eq_common*0.29629629629629628); + const double r_1 = f_1 + omega*(f_1*-0.5 + f_2*-0.5 + f_eq_common*0.07407407407407407 + rho*0.33333333333333331*(u_1*u_1)) + rr_0*(f_1*-0.5 + f_2*0.5 + rho*u_1*0.22222222222222221); + const double r_2 = f_2 + omega*(f_1*-0.5 + f_2*-0.5 + f_eq_common*0.07407407407407407 + rho*0.33333333333333331*(u_1*u_1)) + rr_0*(f_1*0.5 + f_2*-0.5 + rho*u_1*-0.22222222222222221); + const double r_3 = f_3 + omega*(f_3*-0.5 + f_4*-0.5 + f_eq_common*0.07407407407407407 + rho*0.33333333333333331*(u_0*u_0)) + rr_0*(f_3*-0.5 + f_4*0.5 + rho*u_0*-0.22222222222222221); + const double r_4 = f_4 + omega*(f_3*-0.5 + f_4*-0.5 + f_eq_common*0.07407407407407407 + rho*0.33333333333333331*(u_0*u_0)) + rr_0*(f_3*0.5 + f_4*-0.5 + rho*u_0*0.22222222222222221); + const double r_5 = f_5 + omega*(f_5*-0.5 + f_6*-0.5 + f_eq_common*0.07407407407407407 + rho*0.33333333333333331*(u_2*u_2)) + rr_0*(f_5*-0.5 + f_6*0.5 + rho*u_2*0.22222222222222221); + const double r_6 = f_6 + omega*(f_5*-0.5 + f_6*-0.5 + f_eq_common*0.07407407407407407 + rho*0.33333333333333331*(u_2*u_2)) + rr_0*(f_5*0.5 + f_6*-0.5 + rho*u_2*-0.22222222222222221); + const double r_7 = f_7 + omega*(f_10*-0.5 + f_7*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u0Mu1*u0Mu1)) + rr_0*(f_10*0.5 + f_7*-0.5 + rho*u0Mu1*-0.055555555555555552); + const double r_8 = f_8 + omega*(f_8*-0.5 + f_9*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u0Pu1*u0Pu1)) + rr_0*(f_8*-0.5 + f_9*0.5 + rho*u0Pu1*0.055555555555555552); + const double r_9 = f_9 + omega*(f_8*-0.5 + f_9*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u0Pu1*u0Pu1)) + rr_0*(f_8*0.5 + f_9*-0.5 + rho*u0Pu1*-0.055555555555555552); + const double r_10 = f_10 + omega*(f_10*-0.5 + f_7*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u0Mu1*u0Mu1)) + rr_0*(f_10*-0.5 + f_7*0.5 + rho*u0Mu1*0.055555555555555552); + const double r_11 = f_11 + omega*(f_11*-0.5 + f_16*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u1Pu2*u1Pu2)) + rr_0*(f_11*-0.5 + f_16*0.5 + rho*u1Pu2*0.055555555555555552); + const double r_12 = f_12 + omega*(f_12*-0.5 + f_15*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u1Mu2*u1Mu2)) + rr_0*(f_12*-0.5 + f_15*0.5 + rho*u1Mu2*-0.055555555555555552); + const double r_13 = f_13 + omega*(f_13*-0.5 + f_18*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u0Mu2*u0Mu2)) + rr_0*(f_13*-0.5 + f_18*0.5 + rho*u0Mu2*-0.055555555555555552); + const double r_14 = f_14 + omega*(f_14*-0.5 + f_17*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u0Pu2*u0Pu2)) + rr_0*(f_14*-0.5 + f_17*0.5 + rho*u0Pu2*0.055555555555555552); + const double r_15 = f_15 + omega*(f_12*-0.5 + f_15*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u1Mu2*u1Mu2)) + rr_0*(f_12*0.5 + f_15*-0.5 + rho*u1Mu2*0.055555555555555552); + const double r_16 = f_16 + omega*(f_11*-0.5 + f_16*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u1Pu2*u1Pu2)) + rr_0*(f_11*0.5 + f_16*-0.5 + rho*u1Pu2*-0.055555555555555552); + const double r_17 = f_17 + omega*(f_14*-0.5 + f_17*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u0Pu2*u0Pu2)) + rr_0*(f_14*0.5 + f_17*-0.5 + rho*u0Pu2*-0.055555555555555552); + const double r_18 = f_18 + omega*(f_13*-0.5 + f_18*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u0Mu2*u0Mu2)) + rr_0*(f_13*0.5 + f_18*-0.5 + rho*u0Mu2*0.055555555555555552); + const double r_19 = f_19 + omega*(delta_rho*-0.013888888888888888 + f_19*-0.5 + f_26*-0.5 + f_eq_common*0.018518518518518517 + rho*(0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2))) + rr_0*(f_19*-0.5 + f_26*0.5 + rho*(u0Pu1*0.013888888888888888 + u_2*0.013888888888888888)); + const double r_20 = f_20 + omega*(delta_rho*-0.013888888888888888 + f_20*-0.5 + f_25*-0.5 + f_eq_common*0.018518518518518517 + rho*(0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2))) + rr_0*(f_20*-0.5 + f_25*0.5 + rho*(u0Mu1*-0.013888888888888888 + u_2*0.013888888888888888)); + const double r_21 = f_21 + omega*(delta_rho*-0.013888888888888888 + f_21*-0.5 + f_24*-0.5 + f_eq_common*0.018518518518518517 + rho*(0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2))) + rr_0*(f_21*-0.5 + f_24*0.5 + rho*(u0Mu1*0.013888888888888888 + u_2*0.013888888888888888)); + const double r_22 = f_22 + omega*(delta_rho*-0.013888888888888888 + f_22*-0.5 + f_23*-0.5 + f_eq_common*0.018518518518518517 + rho*(0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2))) + rr_0*(f_22*-0.5 + f_23*0.5 + rho*(u0Pu1*-0.013888888888888888 + u_2*0.013888888888888888)); + const double r_23 = f_23 + omega*(delta_rho*-0.013888888888888888 + f_22*-0.5 + f_23*-0.5 + f_eq_common*0.018518518518518517 + rho*(0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2))) + rr_0*(f_22*0.5 + f_23*-0.5 + rho*(u0Pu1*0.013888888888888888 + u_2*-0.013888888888888888)); + const double r_24 = f_24 + omega*(delta_rho*-0.013888888888888888 + f_21*-0.5 + f_24*-0.5 + f_eq_common*0.018518518518518517 + rho*(0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2))) + rr_0*(f_21*0.5 + f_24*-0.5 + rho*(u0Mu1*-0.013888888888888888 + u_2*-0.013888888888888888)); + const double r_25 = f_25 + omega*(delta_rho*-0.013888888888888888 + f_20*-0.5 + f_25*-0.5 + f_eq_common*0.018518518518518517 + rho*(0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2))) + rr_0*(f_20*0.5 + f_25*-0.5 + rho*(u0Mu1*0.013888888888888888 + u_2*-0.013888888888888888)); + const double r_26 = f_26 + omega*(delta_rho*-0.013888888888888888 + f_19*-0.5 + f_26*-0.5 + f_eq_common*0.018518518518518517 + rho*(0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2))) + rr_0*(f_19*0.5 + f_26*-0.5 + rho*(u0Pu1*-0.013888888888888888 + u_2*-0.013888888888888888)); + + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 0*_stride_pdfs_3] = r_0; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 1*_stride_pdfs_3] = r_1; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 2*_stride_pdfs_3] = r_2; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 3*_stride_pdfs_3] = r_3; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 4*_stride_pdfs_3] = r_4; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 5*_stride_pdfs_3] = r_5; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 6*_stride_pdfs_3] = r_6; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 7*_stride_pdfs_3] = r_7; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 8*_stride_pdfs_3] = r_8; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 9*_stride_pdfs_3] = r_9; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 10*_stride_pdfs_3] = r_10; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 11*_stride_pdfs_3] = r_11; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 12*_stride_pdfs_3] = r_12; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 13*_stride_pdfs_3] = r_13; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 14*_stride_pdfs_3] = r_14; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 15*_stride_pdfs_3] = r_15; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 16*_stride_pdfs_3] = r_16; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 17*_stride_pdfs_3] = r_17; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 18*_stride_pdfs_3] = r_18; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 19*_stride_pdfs_3] = r_19; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 20*_stride_pdfs_3] = r_20; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 21*_stride_pdfs_3] = r_21; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 22*_stride_pdfs_3] = r_22; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 23*_stride_pdfs_3] = r_23; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 24*_stride_pdfs_3] = r_24; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 25*_stride_pdfs_3] = r_25; + pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 26*_stride_pdfs_3] = r_26; + } + } + } + + void streamCollideOdd() + { + double omega = omegaVector[0]; + const double xi_0 = ((1.0) / (omega*-0.25 + 2.0)); + const double rr_0 = xi_0*(omega*-2.0 + 4.0); + + // std::map<int, int> m{{-1, 0}, {0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 0}}; + std::map<int, int> m{{-1, 0}, {0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 0}}; + + for(uint_t i = 0; i < _nBlocks; ++i) + { + for (auto it: _interval) + { + const cell_idx_t ctr_2 = it.z(); + const cell_idx_t ctr_1 = it.y(); + const cell_idx_t ctr_0 = it.x(); + + const uint_t index_C = uint_c(27) * i + uint_c( (0 + 1) * 9 + (0 + 1) * 3 + 0 + 1 ); + const uint_t index_N = uint_c(27) * i + uint_c( (0 + 1) * 9 + ((ctr_1 -1 < 0 ? -1 : 0) + 1) * 3 + 0 + 1 ); + const uint_t index_S = uint_c(27) * i + uint_c( (0 + 1) * 9 + ((ctr_1 +1 > 7 ? 1 : 0) + 1) * 3 + 0 + 1 ); + const uint_t index_W = uint_c(27) * i + uint_c( (0 + 1) * 9 + (0 + 1) * 3 + (ctr_0 +1 > 7 ? 1 : 0) + 1 ); + const uint_t index_E = uint_c(27) * i + uint_c( (0 + 1) * 9 + (0 + 1) * 3 + (ctr_0 -1 < 0 ? -1 : 0) + 1 ); + const uint_t index_T = uint_c(27) * i + uint_c( ((ctr_2 -1 < 0 ? -1 : 0) + 1) * 9 + (0 + 1) * 3 + 0 + 1 ); + const uint_t index_B = uint_c(27) * i + uint_c( ((ctr_2 +1 > 7 ? 1 : 0) + 1) * 9 + (0 + 1) * 3 + 0 + 1 ); + const uint_t index_NW = uint_c(27) * i + uint_c( (0 + 1) * 9 + ((ctr_1 -1 < 0 ? -1 : 0) + 1) * 3 + (ctr_0 +1 > 7 ? 1 : 0) + 1 ); + const uint_t index_NE = uint_c(27) * i + uint_c( (0 + 1) * 9 + ((ctr_1 -1 < 0 ? -1 : 0) + 1) * 3 + (ctr_0 -1 < 0 ? -1 : 0) + 1 ); + const uint_t index_SW = uint_c(27) * i + uint_c( (0 + 1) * 9 + ((ctr_1 +1 > 7 ? 1 : 0) + 1) * 3 + (ctr_0 +1 > 7 ? 1 : 0) + 1 ); + const uint_t index_SE = uint_c(27) * i + uint_c( (0 + 1) * 9 + ((ctr_1 +1 > 7 ? 1 : 0) + 1) * 3 + (ctr_0 -1 < 0 ? -1 : 0) + 1 ); + const uint_t index_TN = uint_c(27) * i + uint_c( ((ctr_2 -1 < 0 ? -1 : 0) + 1) * 9 + ((ctr_1 -1 < 0 ? -1 : 0) + 1) * 3 + 0 + 1 ); + const uint_t index_TS = uint_c(27) * i + uint_c( ((ctr_2 -1 < 0 ? -1 : 0) + 1) * 9 + ((ctr_1 +1 > 7 ? 1 : 0) + 1) * 3 + 0 + 1 ); + const uint_t index_TW = uint_c(27) * i + uint_c( ((ctr_2 -1 < 0 ? -1 : 0) + 1) * 9 + (0 + 1) * 3 + (ctr_0 +1 > 7 ? 1 : 0) + 1 ); + const uint_t index_TE = uint_c(27) * i + uint_c( ((ctr_2 -1 < 0 ? -1 : 0) + 1) * 9 + (0 + 1) * 3 + (ctr_0 -1 < 0 ? -1 : 0) + 1 ); + const uint_t index_BN = uint_c(27) * i + uint_c( ((ctr_2 +1 > 7 ? 1 : 0) + 1) * 9 + ((ctr_1 -1 < 0 ? -1 : 0) + 1) * 3 + 0 + 1 ); + const uint_t index_BS = uint_c(27) * i + uint_c( ((ctr_2 +1 > 7 ? 1 : 0) + 1) * 9 + ((ctr_1 +1 > 7 ? 1 : 0) + 1) * 3 + 0 + 1 ); + const uint_t index_BW = uint_c(27) * i + uint_c( ((ctr_2 +1 > 7 ? 1 : 0) + 1) * 9 + (0 + 1) * 3 + (ctr_0 +1 > 7 ? 1 : 0) + 1 ); + const uint_t index_BE = uint_c(27) * i + uint_c( ((ctr_2 +1 > 7 ? 1 : 0) + 1) * 9 + (0 + 1) * 3 + (ctr_0 -1 < 0 ? -1 : 0) + 1 ); + const uint_t index_TNE = uint_c(27) * i + uint_c( ((ctr_2 -1 < 0 ? -1 : 0) + 1) * 9 + ((ctr_1 -1 < 0 ? -1 : 0) + 1) * 3 + (ctr_0 -1 < 0 ? -1 : 0) + 1 ); + const uint_t index_TNW = uint_c(27) * i + uint_c( ((ctr_2 -1 < 0 ? -1 : 0) + 1) * 9 + ((ctr_1 -1 < 0 ? -1 : 0) + 1) * 3 + (ctr_0 +1 > 7 ? 1 : 0) + 1 ); + const uint_t index_TSE = uint_c(27) * i + uint_c( ((ctr_2 -1 < 0 ? -1 : 0) + 1) * 9 + ((ctr_1 +1 > 7 ? 1 : 0) + 1) * 3 + (ctr_0 -1 < 0 ? -1 : 0) + 1 ); + const uint_t index_TSW = uint_c(27) * i + uint_c( ((ctr_2 -1 < 0 ? -1 : 0) + 1) * 9 + ((ctr_1 +1 > 7 ? 1 : 0) + 1) * 3 + (ctr_0 +1 > 7 ? 1 : 0) + 1 ); + const uint_t index_BNE = uint_c(27) * i + uint_c( ((ctr_2 +1 > 7 ? 1 : 0) + 1) * 9 + ((ctr_1 -1 < 0 ? -1 : 0) + 1) * 3 + (ctr_0 -1 < 0 ? -1 : 0) + 1 ); + const uint_t index_BNW = uint_c(27) * i + uint_c( ((ctr_2 +1 > 7 ? 1 : 0) + 1) * 9 + ((ctr_1 -1 < 0 ? -1 : 0) + 1) * 3 + (ctr_0 +1 > 7 ? 1 : 0) + 1 ); + const uint_t index_BSE = uint_c(27) * i + uint_c( ((ctr_2 +1 > 7 ? 1 : 0) + 1) * 9 + ((ctr_1 +1 > 7 ? 1 : 0) + 1) * 3 + (ctr_0 -1 < 0 ? -1 : 0) + 1 ); + const uint_t index_BSW = uint_c(27) * i + uint_c( ((ctr_2 +1 > 7 ? 1 : 0) + 1) * 9 + ((ctr_1 +1 > 7 ? 1 : 0) + 1) * 3 + (ctr_0 +1 > 7 ? 1 : 0) + 1 ); + + + const double f_0 = pdfsTmp[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 0*_stride_pdfs_3]; + const double f_1 = pdfsTmp[index_N][ctr_0*_stride_pdfs_0 + m[ctr_1 - 1]*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 1*_stride_pdfs_3]; + const double f_2 = pdfsTmp[index_S][ctr_0*_stride_pdfs_0 + m[ctr_1 + 1]*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 2*_stride_pdfs_3]; + const double f_3 = pdfsTmp[index_W][m[ctr_0 + 1]*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 3*_stride_pdfs_3]; + const double f_4 = pdfsTmp[index_E][m[ctr_0 - 1]*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 4*_stride_pdfs_3]; + const double f_5 = pdfsTmp[index_T][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + m[ctr_2 - 1]*_stride_pdfs_2 + 5*_stride_pdfs_3]; + const double f_6 = pdfsTmp[index_B][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + m[ctr_2 + 1]*_stride_pdfs_2 + 6*_stride_pdfs_3]; + const double f_7 = pdfsTmp[index_NW][m[ctr_0 + 1]*_stride_pdfs_0 + m[ctr_1 - 1]*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 7*_stride_pdfs_3]; + const double f_8 = pdfsTmp[index_NE][m[ctr_0 - 1]*_stride_pdfs_0 + m[ctr_1 - 1]*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 8*_stride_pdfs_3]; + const double f_9 = pdfsTmp[index_SW][m[ctr_0 + 1]*_stride_pdfs_0 + m[ctr_1 + 1]*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 9*_stride_pdfs_3]; + const double f_10 = pdfsTmp[index_SE][m[ctr_0 - 1]*_stride_pdfs_0 + m[ctr_1 + 1]*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 10*_stride_pdfs_3]; + const double f_11 = pdfsTmp[index_TN][ctr_0*_stride_pdfs_0 + m[ctr_1 - 1]*_stride_pdfs_1 + m[ctr_2 - 1]*_stride_pdfs_2 + 11*_stride_pdfs_3]; + const double f_12 = pdfsTmp[index_TS][ctr_0*_stride_pdfs_0 + m[ctr_1 + 1]*_stride_pdfs_1 + m[ctr_2 - 1]*_stride_pdfs_2 + 12*_stride_pdfs_3]; + const double f_13 = pdfsTmp[index_TW][m[ctr_0 + 1]*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + m[ctr_2 - 1]*_stride_pdfs_2 + 13*_stride_pdfs_3]; + const double f_14 = pdfsTmp[index_TE][m[ctr_0 - 1]*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + m[ctr_2 - 1]*_stride_pdfs_2 + 14*_stride_pdfs_3]; + const double f_15 = pdfsTmp[index_BN][ctr_0*_stride_pdfs_0 + m[ctr_1 - 1]*_stride_pdfs_1 + m[ctr_2 + 1]*_stride_pdfs_2 + 15*_stride_pdfs_3]; + const double f_16 = pdfsTmp[index_BS][ctr_0*_stride_pdfs_0 + m[ctr_1 + 1]*_stride_pdfs_1 + m[ctr_2 + 1]*_stride_pdfs_2 + 16*_stride_pdfs_3]; + const double f_17 = pdfsTmp[index_BW][m[ctr_0 + 1]*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + m[ctr_2 + 1]*_stride_pdfs_2 + 17*_stride_pdfs_3]; + const double f_18 = pdfsTmp[index_BE][m[ctr_0 - 1]*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + m[ctr_2 + 1]*_stride_pdfs_2 + 18*_stride_pdfs_3]; + const double f_19 = pdfsTmp[index_TNE][m[ctr_0 - 1]*_stride_pdfs_0 + m[ctr_1 - 1]*_stride_pdfs_1 + m[ctr_2 - 1]*_stride_pdfs_2 + 19*_stride_pdfs_3]; + const double f_20 = pdfsTmp[index_TNW][m[ctr_0 + 1]*_stride_pdfs_0 + m[ctr_1 - 1]*_stride_pdfs_1 + m[ctr_2 - 1]*_stride_pdfs_2 + 20*_stride_pdfs_3]; + const double f_21 = pdfsTmp[index_TSE][m[ctr_0 - 1]*_stride_pdfs_0 + m[ctr_1 + 1]*_stride_pdfs_1 + m[ctr_2 - 1]*_stride_pdfs_2 + 21*_stride_pdfs_3]; + const double f_22 = pdfsTmp[index_TSW][m[ctr_0 + 1]*_stride_pdfs_0 + m[ctr_1 + 1]*_stride_pdfs_1 + m[ctr_2 - 1]*_stride_pdfs_2 + 22*_stride_pdfs_3]; + const double f_23 = pdfsTmp[index_BNE][m[ctr_0 - 1]*_stride_pdfs_0 + m[ctr_1 - 1]*_stride_pdfs_1 + m[ctr_2 + 1]*_stride_pdfs_2 + 23*_stride_pdfs_3]; + const double f_24 = pdfsTmp[index_BNW][m[ctr_0 + 1]*_stride_pdfs_0 + m[ctr_1 - 1]*_stride_pdfs_1 + m[ctr_2 + 1]*_stride_pdfs_2 + 24*_stride_pdfs_3]; + const double f_25 = pdfsTmp[index_BSE][m[ctr_0 - 1]*_stride_pdfs_0 + m[ctr_1 + 1]*_stride_pdfs_1 + m[ctr_2 + 1]*_stride_pdfs_2 + 25*_stride_pdfs_3]; + const double f_26 = pdfsTmp[index_BSW][m[ctr_0 + 1]*_stride_pdfs_0 + m[ctr_1 + 1]*_stride_pdfs_1 + m[ctr_2 + 1]*_stride_pdfs_2 + 26*_stride_pdfs_3]; + + const double vel0Term = f_10 + f_14 + f_18 + f_19 + f_21 + f_23 + f_25 + f_4 + f_8; + const double vel1Term = f_1 + f_11 + f_15 + f_20 + f_24 + f_7; + const double vel2Term = f_12 + f_13 + f_22 + f_5; + const double delta_rho = f_0 + f_16 + f_17 + f_2 + f_26 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term; + const double rho = delta_rho + 1.0; + const double xi_1 = ((1.0) / (rho)); + const double u_0 = xi_1*(-f_13 - f_17 - f_20 - f_22 - f_24 - f_26 - f_3 - f_7 - f_9 + vel0Term); + const double u_1 = xi_1*(-f_10 - f_12 - f_16 + f_19 - f_2 - f_21 - f_22 + f_23 - f_25 - f_26 + f_8 - f_9 + vel1Term); + const double u_2 = xi_1*(f_11 + f_14 - f_15 - f_16 - f_17 - f_18 + f_19 + f_20 + f_21 - f_23 - f_24 - f_25 - f_26 - f_6 + vel2Term); + const double u0Mu1 = u_0 - u_1; + const double u0Pu1 = u_0 + u_1; + const double u1Pu2 = u_1 + u_2; + const double u1Mu2 = u_1 - u_2; + const double u0Mu2 = u_0 - u_2; + const double u0Pu2 = u_0 + u_2; + const double f_eq_common = delta_rho + rho*-1.5*(u_0*u_0) + rho*-1.5*(u_1*u_1) + rho*-1.5*(u_2*u_2); + + const double r_0 = f_0 + omega*(-f_0 + f_eq_common*0.29629629629629628); + const double r_1 = f_1 + omega*(f_1*-0.5 + f_2*-0.5 + f_eq_common*0.07407407407407407 + rho*0.33333333333333331*(u_1*u_1)) + rr_0*(f_1*-0.5 + f_2*0.5 + rho*u_1*0.22222222222222221); + const double r_2 = f_2 + omega*(f_1*-0.5 + f_2*-0.5 + f_eq_common*0.07407407407407407 + rho*0.33333333333333331*(u_1*u_1)) + rr_0*(f_1*0.5 + f_2*-0.5 + rho*u_1*-0.22222222222222221); + const double r_3 = f_3 + omega*(f_3*-0.5 + f_4*-0.5 + f_eq_common*0.07407407407407407 + rho*0.33333333333333331*(u_0*u_0)) + rr_0*(f_3*-0.5 + f_4*0.5 + rho*u_0*-0.22222222222222221); + const double r_4 = f_4 + omega*(f_3*-0.5 + f_4*-0.5 + f_eq_common*0.07407407407407407 + rho*0.33333333333333331*(u_0*u_0)) + rr_0*(f_3*0.5 + f_4*-0.5 + rho*u_0*0.22222222222222221); + const double r_5 = f_5 + omega*(f_5*-0.5 + f_6*-0.5 + f_eq_common*0.07407407407407407 + rho*0.33333333333333331*(u_2*u_2)) + rr_0*(f_5*-0.5 + f_6*0.5 + rho*u_2*0.22222222222222221); + const double r_6 = f_6 + omega*(f_5*-0.5 + f_6*-0.5 + f_eq_common*0.07407407407407407 + rho*0.33333333333333331*(u_2*u_2)) + rr_0*(f_5*0.5 + f_6*-0.5 + rho*u_2*-0.22222222222222221); + const double r_7 = f_7 + omega*(f_10*-0.5 + f_7*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u0Mu1*u0Mu1)) + rr_0*(f_10*0.5 + f_7*-0.5 + rho*u0Mu1*-0.055555555555555552); + const double r_8 = f_8 + omega*(f_8*-0.5 + f_9*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u0Pu1*u0Pu1)) + rr_0*(f_8*-0.5 + f_9*0.5 + rho*u0Pu1*0.055555555555555552); + const double r_9 = f_9 + omega*(f_8*-0.5 + f_9*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u0Pu1*u0Pu1)) + rr_0*(f_8*0.5 + f_9*-0.5 + rho*u0Pu1*-0.055555555555555552); + const double r_10 = f_10 + omega*(f_10*-0.5 + f_7*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u0Mu1*u0Mu1)) + rr_0*(f_10*-0.5 + f_7*0.5 + rho*u0Mu1*0.055555555555555552); + const double r_11 = f_11 + omega*(f_11*-0.5 + f_16*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u1Pu2*u1Pu2)) + rr_0*(f_11*-0.5 + f_16*0.5 + rho*u1Pu2*0.055555555555555552); + const double r_12 = f_12 + omega*(f_12*-0.5 + f_15*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u1Mu2*u1Mu2)) + rr_0*(f_12*-0.5 + f_15*0.5 + rho*u1Mu2*-0.055555555555555552); + const double r_13 = f_13 + omega*(f_13*-0.5 + f_18*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u0Mu2*u0Mu2)) + rr_0*(f_13*-0.5 + f_18*0.5 + rho*u0Mu2*-0.055555555555555552); + const double r_14 = f_14 + omega*(f_14*-0.5 + f_17*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u0Pu2*u0Pu2)) + rr_0*(f_14*-0.5 + f_17*0.5 + rho*u0Pu2*0.055555555555555552); + const double r_15 = f_15 + omega*(f_12*-0.5 + f_15*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u1Mu2*u1Mu2)) + rr_0*(f_12*0.5 + f_15*-0.5 + rho*u1Mu2*0.055555555555555552); + const double r_16 = f_16 + omega*(f_11*-0.5 + f_16*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u1Pu2*u1Pu2)) + rr_0*(f_11*0.5 + f_16*-0.5 + rho*u1Pu2*-0.055555555555555552); + const double r_17 = f_17 + omega*(f_14*-0.5 + f_17*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u0Pu2*u0Pu2)) + rr_0*(f_14*0.5 + f_17*-0.5 + rho*u0Pu2*-0.055555555555555552); + const double r_18 = f_18 + omega*(f_13*-0.5 + f_18*-0.5 + f_eq_common*0.018518518518518517 + rho*0.083333333333333329*(u0Mu2*u0Mu2)) + rr_0*(f_13*0.5 + f_18*-0.5 + rho*u0Mu2*0.055555555555555552); + const double r_19 = f_19 + omega*(delta_rho*-0.013888888888888888 + f_19*-0.5 + f_26*-0.5 + f_eq_common*0.018518518518518517 + rho*(0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2))) + rr_0*(f_19*-0.5 + f_26*0.5 + rho*(u0Pu1*0.013888888888888888 + u_2*0.013888888888888888)); + const double r_20 = f_20 + omega*(delta_rho*-0.013888888888888888 + f_20*-0.5 + f_25*-0.5 + f_eq_common*0.018518518518518517 + rho*(0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2))) + rr_0*(f_20*-0.5 + f_25*0.5 + rho*(u0Mu1*-0.013888888888888888 + u_2*0.013888888888888888)); + const double r_21 = f_21 + omega*(delta_rho*-0.013888888888888888 + f_21*-0.5 + f_24*-0.5 + f_eq_common*0.018518518518518517 + rho*(0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2))) + rr_0*(f_21*-0.5 + f_24*0.5 + rho*(u0Mu1*0.013888888888888888 + u_2*0.013888888888888888)); + const double r_22 = f_22 + omega*(delta_rho*-0.013888888888888888 + f_22*-0.5 + f_23*-0.5 + f_eq_common*0.018518518518518517 + rho*(0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2))) + rr_0*(f_22*-0.5 + f_23*0.5 + rho*(u0Pu1*-0.013888888888888888 + u_2*0.013888888888888888)); + const double r_23 = f_23 + omega*(delta_rho*-0.013888888888888888 + f_22*-0.5 + f_23*-0.5 + f_eq_common*0.018518518518518517 + rho*(0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2))) + rr_0*(f_22*0.5 + f_23*-0.5 + rho*(u0Pu1*0.013888888888888888 + u_2*-0.013888888888888888)); + const double r_24 = f_24 + omega*(delta_rho*-0.013888888888888888 + f_21*-0.5 + f_24*-0.5 + f_eq_common*0.018518518518518517 + rho*(0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2))) + rr_0*(f_21*0.5 + f_24*-0.5 + rho*(u0Mu1*-0.013888888888888888 + u_2*-0.013888888888888888)); + const double r_25 = f_25 + omega*(delta_rho*-0.013888888888888888 + f_20*-0.5 + f_25*-0.5 + f_eq_common*0.018518518518518517 + rho*(0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2))) + rr_0*(f_20*0.5 + f_25*-0.5 + rho*(u0Mu1*0.013888888888888888 + u_2*-0.013888888888888888)); + const double r_26 = f_26 + omega*(delta_rho*-0.013888888888888888 + f_19*-0.5 + f_26*-0.5 + f_eq_common*0.018518518518518517 + rho*(0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2))) + rr_0*(f_19*0.5 + f_26*-0.5 + rho*(u0Pu1*-0.013888888888888888 + u_2*-0.013888888888888888)); + + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 0*_stride_pdfs_3] = r_0; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 1*_stride_pdfs_3] = r_1; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 2*_stride_pdfs_3] = r_2; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 3*_stride_pdfs_3] = r_3; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 4*_stride_pdfs_3] = r_4; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 5*_stride_pdfs_3] = r_5; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 6*_stride_pdfs_3] = r_6; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 7*_stride_pdfs_3] = r_7; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 8*_stride_pdfs_3] = r_8; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 9*_stride_pdfs_3] = r_9; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 10*_stride_pdfs_3] = r_10; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 11*_stride_pdfs_3] = r_11; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 12*_stride_pdfs_3] = r_12; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 13*_stride_pdfs_3] = r_13; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 14*_stride_pdfs_3] = r_14; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 15*_stride_pdfs_3] = r_15; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 16*_stride_pdfs_3] = r_16; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 17*_stride_pdfs_3] = r_17; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 18*_stride_pdfs_3] = r_18; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 19*_stride_pdfs_3] = r_19; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 20*_stride_pdfs_3] = r_20; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 21*_stride_pdfs_3] = r_21; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 22*_stride_pdfs_3] = r_22; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 23*_stride_pdfs_3] = r_23; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 24*_stride_pdfs_3] = r_24; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 25*_stride_pdfs_3] = r_25; + pdfs[index_C][ctr_0*_stride_pdfs_0 + ctr_1*_stride_pdfs_1 + ctr_2*_stride_pdfs_2 + 26*_stride_pdfs_3] = r_26; + } + } + } + private: + IDs ids_; + std::vector<Block*> blockVector; + std::vector<real_t> omegaVector; + std::vector<real_t *> pdfs; + std::vector<real_t *> pdfsTmp; + CellInterval _interval; + uint_t _nBlocks; + int64_t _stride_pdfs_0; + int64_t _stride_pdfs_1; + int64_t _stride_pdfs_2; + int64_t _stride_pdfs_3; +}; + +class Timestep +{ + public: + Timestep(std::shared_ptr< StructuredBlockForest >& blocks, const IDs& ids, + StreamCollide & streamCollide, std::shared_ptr< PackInfo_T >& packInfo, UniformBufferedScheme<CommunicationStencil_T>& communication) + : blocks_(blocks), ids_(ids), streamCollide_(streamCollide), packInfo_(packInfo), communication_(communication) + { + + } + + void commLocal(const uint_t level) + { + for( auto it = blocks_->begin(); it != blocks_->end(); ++it ) + { + if(blocks_->getLevel(*it.get()) != level) + continue; + + auto* sender = dynamic_cast< Block* >(it.get()); + for( auto dir = Stencil_T::beginNoCenter(); dir != Stencil_T::end(); ++dir ) + { + const auto neighborIdx = blockforest::getBlockNeighborhoodSectionIndex(*dir); + if (!(sender->neighborhoodSectionHasEquallySizedBlock(neighborIdx))) + continue; + + const BlockID & receiverId = sender->getNeighborId( neighborIdx, uint_t(0) ); + auto receiver = dynamic_cast< Block * >( blocks_->getBlock(receiverId) ); + + const PdfField_T * srcField = sender->getData< PdfField_T >( ids_.pdfField ); + PdfField_T * dstField = receiver->getData< PdfField_T >( ids_.pdfField ); + + CellInterval srcRegion; + CellInterval dstRegion; + cell_idx_t gls = 1; + srcField->getSliceBeforeGhostLayer(*dir, srcRegion, gls, false); + dstField->getGhostRegion(stencil::inverseDir[*dir], dstRegion, gls, false); + WALBERLA_ASSERT_EQUAL(srcRegion.xSize(), dstRegion.xSize()) + WALBERLA_ASSERT_EQUAL(srcRegion.ySize(), dstRegion.ySize()) + WALBERLA_ASSERT_EQUAL(srcRegion.zSize(), dstRegion.zSize()) + + //WALBERLA_LOG_DEVEL_VAR(srcRegion) + //WALBERLA_LOG_DEVEL_VAR(dstRegion) + + auto srcIter = srcRegion.begin(); + auto dstIter = dstRegion.begin(); + + while (srcIter != srcRegion.end()) + { + for( uint_t f = 0; f < Stencil_T::d_per_d_length[*dir]; ++f ) + { + dstField->get(*dstIter, Stencil_T::idx[ Stencil_T::d_per_d[*dir][f] ]) = srcField->get(*srcIter, Stencil_T::idx[ Stencil_T::d_per_d[*dir][f] ]); + } + ++srcIter; + ++dstIter; + } + WALBERLA_ASSERT( srcIter == srcRegion.end() ) + WALBERLA_ASSERT( dstIter == dstRegion.end() ) + } + } + } + + void swap() + { + for( auto it = blocks_->begin(); it != blocks_->end(); ++it ) + { + PdfField_T * srcField = it->getData< PdfField_T >( ids_.pdfField ); + PdfField_T * dstField = it->getData< PdfField_T >( ids_.pdfFieldTmp ); + srcField->swapDataPointers(dstField); + } + } + + void timestep() + { +// for( auto it = blocks_->begin(); it != blocks_->end(); ++it ) +// { +// auto block = dynamic_cast< Block* >(it.get()); +// commLocal(0); +// streamCollide_.streamCollide(block); +// // streamCollide_(block); +// } +// commLocal(0); + if(((timestepCounter_) &1) ^ 1) + { + streamCollide_.streamCollideEven(); + swap(); + } + else + { + streamCollide_.streamCollideOdd(); + swap(); + } + timestepCounter_ = (timestepCounter_ + 1) & 1; + } + + void operator()(){ timestep(); }; + + private: + std::shared_ptr< StructuredBlockForest > blocks_; + IDs ids_; + StreamCollide & streamCollide_; + std::shared_ptr< PackInfo_T > packInfo_; + UniformBufferedScheme<CommunicationStencil_T> communication_; + uint8_t timestepCounter_{0}; +}; + +int main(int argc, char** argv) +{ + mpi::Environment env(argc, argv); + shared_ptr< Config > config = make_shared< Config >(); + config->readParameterFile(argv[1]); + logging::configureLogging(config); + + // read parameters + auto parameters = config->getOneBlock("Parameters"); + auto loggingParameters = config->getOneBlock("Logging"); + auto domainParameters = config->getOneBlock("DomainSetup"); + // auto EvaluationParameters = config->getOneBlock("Evaluation"); + + const real_t omega = parameters.getParameter< real_t >("omega"); + const real_t maxLatticeVelocity = parameters.getParameter< real_t >("maxLatticeVelocity"); + const uint_t timesteps = parameters.getParameter< uint_t >("timesteps") + uint_c(1); + + Setup setup; + setup.blocks = domainParameters.getParameter< Vector3< uint_t > >("blocks"); + setup.cellsPerBlock = domainParameters.getParameter< Vector3< uint_t > >("cellsPerBlock"); + setup.cells = Vector3< uint_t >(setup.blocks[0] * setup.cellsPerBlock[0], setup.blocks[1] * setup.cellsPerBlock[1], + setup.blocks[2] * setup.cellsPerBlock[2]); + setup.periodic = domainParameters.getParameter< Vector3< bool > >("periodic"); + setup.numGhostLayers = uint_c(1); + + const uint_t valuesPerCell = (uint_c(2) * Stencil_T::Q + VectorField_T ::F_SIZE + uint_c(2) * ScalarField_T::F_SIZE); + const uint_t sizePerValue = sizeof(PdfField_T::value_type); + const memory_t memoryPerCell = memory_t(valuesPerCell * sizePerValue + uint_c(1)); + + bool writeSetupForestAndReturn = loggingParameters.getParameter< bool >("writeSetupForestAndReturn", false); + if (uint_c(MPIManager::instance()->numProcesses()) > 1) writeSetupForestAndReturn = false; + + if (writeSetupForestAndReturn) + { + std::string sbffile = "TaylorGreenVortex.bfs"; + + std::ostringstream infoString; + infoString << "You have selected the option of just creating the block structure (= domain decomposition) and " + "saving the result to file\n" + "by specifying the output file name \'" + << sbffile << "\' AND also specifying \'saveToFile\'.\n"; + + if (MPIManager::instance()->numProcesses() > 1) + WALBERLA_ABORT(infoString.str() << "In this mode you need to start " << argv[0] << " with just one process!") + + WALBERLA_LOG_INFO_ON_ROOT(infoString.str() << "Creating the block structure ...") + + const uint_t numberProcesses = domainParameters.getParameter< uint_t >("numberProcesses"); + + shared_ptr< SetupBlockForest > sforest = createSetupBlockForest(setup, numberProcesses, memoryPerCell); + sforest->writeVTKOutput("domain_decomposition"); + sforest->saveToFile(sbffile.c_str()); + + logging::Logging::printFooterOnStream(); + return EXIT_SUCCESS; + } + + auto blocks = createStructuredBlockForest(setup, memoryPerCell); + + IDs ids; + const StorageSpecification_T StorageSpec = StorageSpecification_T(); + ids.pdfField = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, setup.numGhostLayers, field::fzyx); + ids.pdfFieldTmp = lbm_generated::addPdfFieldToStorage(blocks, "pdfs tmp", StorageSpec, setup.numGhostLayers, field::fzyx); + ids.velocityField = field::addToStorage< VectorField_T >(blocks, "vel", real_c(0.0), field::fzyx, setup.numGhostLayers); + ids.densityField = field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx, setup.numGhostLayers); + ids.flagField = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field", uint_c(3)); + + const cell_idx_t domainHalf = cell_idx_c(blocks->getDomain().yMax() / real_c(2.0)); + for (auto& block : *blocks) + { + auto velocityField = block.getData<VectorField_T>(ids.velocityField); + for (cell_idx_t ctr_2 = 0; ctr_2 < velocityField->zSize(); ++ctr_2) + { + for(cell_idx_t ctr_1 = 0; ctr_1 < velocityField->ySize(); ++ctr_1) + { + for (cell_idx_t ctr_0 = 0; ctr_0 < velocityField->xSize(); ++ctr_0) + { + Cell domainCell(ctr_0, ctr_1, ctr_2); + blocks->transformBlockLocalToGlobalCell(domainCell, block); + if((domainCell.y() + real_c(0.5)) > domainHalf) + { + velocityField->get(ctr_0, ctr_1, ctr_2, 0) = maxLatticeVelocity; + } + else + { + velocityField->get(ctr_0, ctr_1, ctr_2, 0) = -maxLatticeVelocity; + } + } + } + } + } + + SweepCollection_T sweepCollection(blocks, ids.pdfField, ids.densityField, ids.velocityField, omega); + for (auto& block : *blocks) + { + sweepCollection.initialise(&block, cell_idx_c(setup.numGhostLayers - uint_c(1))); + } + + for (auto& block : *blocks) + { + if(blocks->getLevel(block) == 0) + { + sweepCollection.streamCollide(&block); + } + } + + const FlagUID fluidFlagUID("Fluid"); + auto boundariesConfig = config->getBlock("Boundaries"); + geometry::initBoundaryHandling< FlagField_T >(*blocks, ids.flagField, boundariesConfig); + geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, ids.flagField, fluidFlagUID, cell_idx_c(0)); + + // BoundaryCollection_T boundaryCollection(blocks, ids.flagField, ids.pdfField, fluidFlagUID, maxLatticeVelocity); + + WALBERLA_LOG_INFO_ON_ROOT("Setting up communication...") + + std::shared_ptr< PackInfo_T > packInfo = std::make_shared<lbm_generated::UniformGeneratedPdfPackInfo< PdfField_T >>(ids.pdfField); + UniformBufferedScheme< Stencil_T > communication(blocks); + communication.addPackInfo(packInfo); + + SweepTimeloop timeloop(blocks->getBlockStorage(), timesteps); + + auto VTKWriter = config->getOneBlock("VTKWriter"); + const uint_t vtkWriteFrequency = VTKWriter.getParameter< uint_t >("vtkWriteFrequency"); + const bool writeVelocity = VTKWriter.getParameter< bool >("velocity"); + const bool writeDensity = VTKWriter.getParameter< bool >("density"); + // const bool writeAverageFields = VTKWriter.getParameter< bool >("averageFields", false); + const bool writeFlag = VTKWriter.getParameter< bool >("flag"); + // const bool writeOnlySlice = VTKWriter.getParameter< bool >("writeOnlySlice", true); + const bool amrFileFormat = VTKWriter.getParameter< bool >("amrFileFormat", false); + const bool oneFilePerProcess = VTKWriter.getParameter< bool >("oneFilePerProcess", false); + + if (vtkWriteFrequency > 0) + { + auto vtkOutput = + vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_TaylorGreenVortex", "simulation_step", + false, true, true, false, 0, amrFileFormat, oneFilePerProcess); + + vtkOutput->addBeforeFunction([&]() { + for (auto& block : *blocks) + { + sweepCollection.calculateMacroscopicParameters(&block); + } + }); + + if (writeVelocity) + { + auto velWriter = make_shared< field::VTKWriter< VectorField_T, float32 > >(ids.velocityField, "velocity"); + vtkOutput->addCellDataWriter(velWriter); + } + if (writeDensity) + { + auto densityWriter = make_shared< field::VTKWriter< ScalarField_T, float32 > >(ids.densityField, "density"); + vtkOutput->addCellDataWriter(densityWriter); + } + if (writeFlag) + { + auto flagWriter = make_shared< field::VTKWriter< FlagField_T > >(ids.flagField, "flag"); + vtkOutput->addCellDataWriter(flagWriter); + } + timeloop.addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output"); + } + + StreamCollide streamCollide(blocks, ids, omega); + Timestep timeloopFunction(blocks, ids, streamCollide, packInfo, communication); + timeloop.addFuncBeforeTimeStep(timeloopFunction, "Refinement Cycle"); + + + // timeloop.addFuncBeforeTimeStep(streamCollide.getTimestepFunction(), "Refinement Cycle"); + // timeloop.addFuncBeforeTimeStep(streamCollide.swapPDFs(), "Swap Pointer"); + + const real_t remainingTimeLoggerFrequency = loggingParameters.getParameter< real_t >("remainingTimeLoggerFrequency", 3.0); // in seconds + if (uint_c(remainingTimeLoggerFrequency) > 0) + { + timeloop.addFuncAfterTimeStep(timing::RemainingTimeLogger(timeloop.getNrOfTimeSteps(), remainingTimeLoggerFrequency), "remaining time logger"); + } + +// const uint_t evaluationCheckFrequency = EvaluationParameters.getParameter< uint_t >("evaluationCheckFrequency"); +// const bool evaluationLogToStream = EvaluationParameters.getParameter< bool >("logToStream"); +// const bool evaluationLogToFile = EvaluationParameters.getParameter< bool >("logToFile"); +// const std::string evaluationFilename = EvaluationParameters.getParameter< std::string >("filename"); + + // std::shared_ptr< AccuracyEvaluation > evaluation = std::make_shared< AccuracyEvaluation >(blocks, ids, sweepCollection, maxLatticeVelocity, evaluationCheckFrequency, evaluationLogToStream, evaluationLogToFile); + // timeloop.addFuncBeforeTimeStep(SharedFunctor< AccuracyEvaluation >(evaluation), "evaluation"); + + ////////////////////// + /// RUN SIMULATION /// + ////////////////////// + const lbm_generated::PerformanceEvaluation< FlagField_T > performance(blocks, ids.flagField, fluidFlagUID); + field::CellCounter< FlagField_T > fluidCells(blocks, ids.flagField, fluidFlagUID); + fluidCells(); + + WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << blocks->getNumberOfBlocks()) + WALBERLA_LOG_INFO_ON_ROOT("Starting Simulation") + WcTimingPool timeloopTiming; + WcTimer simTimer; + + WALBERLA_MPI_BARRIER() +#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT) + WALBERLA_GPU_CHECK(gpuDeviceSynchronize()) + WALBERLA_GPU_CHECK(gpuPeekAtLastError()) +#endif + + simTimer.start(); + timeloop.run(timeloopTiming); +#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT) + WALBERLA_GPU_CHECK(gpuDeviceSynchronize()) + WALBERLA_GPU_CHECK(gpuPeekAtLastError()) +#endif + WALBERLA_MPI_BARRIER() + simTimer.end(); + + WALBERLA_LOG_INFO_ON_ROOT("Simulation finished") + real_t time = simTimer.max(); + WALBERLA_MPI_SECTION() { walberla::mpi::reduceInplace(time, walberla::mpi::MAX); } + performance.logResultOnRoot(timesteps, time); + + const auto reducedTimeloopTiming = timeloopTiming.getReduced(); + WALBERLA_LOG_RESULT_ON_ROOT("Time loop timing:\n" << *reducedTimeloopTiming) + + printResidentMemoryStatistics(); + + return EXIT_SUCCESS; +} diff --git a/apps/showcases/TaylorGreenVortex/TaylorGreenVortex.prm b/apps/showcases/TaylorGreenVortex/TaylorGreenVortex.prm new file mode 100644 index 0000000000000000000000000000000000000000..b0e5d68307d828c1d83d23a9386ed5ace0f734c1 --- /dev/null +++ b/apps/showcases/TaylorGreenVortex/TaylorGreenVortex.prm @@ -0,0 +1,47 @@ +Parameters +{ + omega 1.4; + timesteps 100; + maxLatticeVelocity 0.01; +} + +DomainSetup +{ + blocks < 4, 4, 4 >; + cellsPerBlock < 8, 8, 8 >; + periodic < 1, 1, 1 >; + numberProcesses 1; // This is for load balancing, overwritten if more than one proc is used +} + +Boundaries +{ + Border { direction S; walldistance -1; flag NoSlip; } + Border { direction N; walldistance -1; flag UBB; } +} + +VTKWriter +{ + vtkWriteFrequency 1; + velocity true; + density true; + averageFields false; + flag false; + writeOnlySlice false; + amrFileFormat false; + oneFilePerProcess false; +} + +Logging +{ + logLevel info; // info progress detail tracing + writeSetupForestAndReturn false; + remainingTimeLoggerFrequency 3; // in seconds +} + +Evaluation +{ + evaluationCheckFrequency 100; + logToStream true; + logToFile true; + filename Channel.txt; +} diff --git a/apps/showcases/TaylorGreenVortex/TaylorGreenVortex.py b/apps/showcases/TaylorGreenVortex/TaylorGreenVortex.py new file mode 100644 index 0000000000000000000000000000000000000000..c974c5d62e848cc9f54145034cb38ee08ef3d937 --- /dev/null +++ b/apps/showcases/TaylorGreenVortex/TaylorGreenVortex.py @@ -0,0 +1,51 @@ +import sympy as sp + +from pystencils import Target +from pystencils import fields + +from lbmpy.advanced_streaming.utility import get_timesteps +from lbmpy.boundaries import NoSlip, UBB +from lbmpy.creationfunctions import create_lb_method, create_lb_collision_rule +from lbmpy import LBMConfig, LBMOptimisation, Stencil, Method, LBStencil +from pystencils_walberla import CodeGeneration, generate_info_header +from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator + +import warnings + +warnings.filterwarnings("ignore") +with CodeGeneration() as ctx: + target = Target.CPU # Target.GPU if ctx.cuda else Target.CPU + data_type = "float64" if ctx.double_accuracy else "float32" + pdf_dtype = "float64" + + streaming_pattern = 'pull' + timesteps = get_timesteps(streaming_pattern) + + omega = sp.symbols("omega") + + stencil = LBStencil(Stencil.D3Q27) + dim = stencil.D + pdfs, pdfs_tmp = fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {pdf_dtype}[3D]", layout='fzyx') + velocity_field, density_field = fields(f"velocity({dim}), density(1) : {data_type}[{dim}D]", layout='fzyx') + macroscopic_fields = {'density': density_field, 'velocity': velocity_field} + + lbm_config = LBMConfig(stencil=stencil, method=Method.TRT, relaxation_rate=omega, + streaming_pattern=streaming_pattern, compressible=True) + lbm_opt = LBMOptimisation(cse_global=False, field_layout='fzyx') + + method = create_lb_method(lbm_config=lbm_config) + collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) + + no_slip = lbm_boundary_generator(class_name='NoSlip', flag_uid='NoSlip', + boundary_object=NoSlip()) + + ubb = lbm_boundary_generator(class_name='UBB', flag_uid='UBB', + boundary_object=UBB([sp.Symbol("u_x"), 0, 0], data_type=data_type)) + + generate_lbm_package(ctx, name="TaylorGreenVortex", + collision_rule=collision_rule, + lbm_config=lbm_config, lbm_optimisation=lbm_opt, + nonuniform=False, boundaries=[no_slip, ubb], + macroscopic_fields=macroscopic_fields, data_type=data_type) + + generate_info_header(ctx, 'TaylorGreenVortexHeader')