From bc5f41faef00ae22fcb64ca8a646a62879d9548f Mon Sep 17 00:00:00 2001
From: Markus Holzer <markus.holzer@fau.de>
Date: Tue, 17 Oct 2023 12:24:29 +0200
Subject: [PATCH] Added test case

---
 .../GridRefSphere/BoundaryCollection.h        |  120 +
 apps/benchmarks/GridRefSphere/CMakeLists.txt  |   16 +
 apps/benchmarks/GridRefSphere/FreeSlip.cu     |  145 +
 apps/benchmarks/GridRefSphere/FreeSlip.h      | 1120 ++++
 apps/benchmarks/GridRefSphere/NoSlip.cu       |  135 +
 apps/benchmarks/GridRefSphere/NoSlip.h        |  527 ++
 apps/benchmarks/GridRefSphere/Outflow.cu      |  147 +
 apps/benchmarks/GridRefSphere/Outflow.h       |  296 +
 apps/benchmarks/GridRefSphere/README          |   27 +
 apps/benchmarks/GridRefSphere/SPHERE.prm      |   31 +
 .../GridRefSphere/StorageSpecification.cu     | 5330 +++++++++++++++++
 .../GridRefSphere/StorageSpecification.h      |  190 +
 .../GridRefSphere/SweepCollection.cu          |  848 +++
 .../GridRefSphere/SweepCollection.h           | 1428 +++++
 apps/benchmarks/GridRefSphere/UBB.cu          |  146 +
 apps/benchmarks/GridRefSphere/UBB.h           |  530 ++
 .../GridRefSphere/gridRef_SPHERE.py           |   64 +
 .../GridRefSphere/refGrid_SPHERE.cpp          |  235 +
 apps/benchmarks/GridRefSphere/sphere.obj      |  218 +
 19 files changed, 11553 insertions(+)
 create mode 100644 apps/benchmarks/GridRefSphere/BoundaryCollection.h
 create mode 100644 apps/benchmarks/GridRefSphere/CMakeLists.txt
 create mode 100644 apps/benchmarks/GridRefSphere/FreeSlip.cu
 create mode 100644 apps/benchmarks/GridRefSphere/FreeSlip.h
 create mode 100644 apps/benchmarks/GridRefSphere/NoSlip.cu
 create mode 100644 apps/benchmarks/GridRefSphere/NoSlip.h
 create mode 100644 apps/benchmarks/GridRefSphere/Outflow.cu
 create mode 100644 apps/benchmarks/GridRefSphere/Outflow.h
 create mode 100644 apps/benchmarks/GridRefSphere/README
 create mode 100644 apps/benchmarks/GridRefSphere/SPHERE.prm
 create mode 100644 apps/benchmarks/GridRefSphere/StorageSpecification.cu
 create mode 100644 apps/benchmarks/GridRefSphere/StorageSpecification.h
 create mode 100644 apps/benchmarks/GridRefSphere/SweepCollection.cu
 create mode 100644 apps/benchmarks/GridRefSphere/SweepCollection.h
 create mode 100644 apps/benchmarks/GridRefSphere/UBB.cu
 create mode 100644 apps/benchmarks/GridRefSphere/UBB.h
 create mode 100644 apps/benchmarks/GridRefSphere/gridRef_SPHERE.py
 create mode 100644 apps/benchmarks/GridRefSphere/refGrid_SPHERE.cpp
 create mode 100644 apps/benchmarks/GridRefSphere/sphere.obj

diff --git a/apps/benchmarks/GridRefSphere/BoundaryCollection.h b/apps/benchmarks/GridRefSphere/BoundaryCollection.h
new file mode 100644
index 000000000..26b312137
--- /dev/null
+++ b/apps/benchmarks/GridRefSphere/BoundaryCollection.h
@@ -0,0 +1,120 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file BoundaryCollection.h
+//! \\author lbmpy
+//======================================================================================================================
+
+#pragma once
+
+#include "core/DataTypes.h"
+#include "domain_decomposition/IBlock.h"
+
+#include "gpu/GPUWrapper.h"
+
+#include "NoSlip.h"
+#include "Outflow.h"
+#include "FreeSlip.h"
+#include "UBB.h"
+
+
+
+namespace walberla{
+namespace lbm {
+
+template <typename FlagField_T>
+class BoundaryCollection
+{
+ public:
+   enum Type { ALL = 0, INNER = 1, OUTER = 2 };
+
+
+   //----Modified Constructor for BoundaryCollection class----//
+   //I've added the ID of the CPU pdfField, before the ID of the GPU pdfField
+   BoundaryCollection(const shared_ptr<StructuredBlockForest> & blocks, BlockDataID flagID_, BlockDataID pdfcpu, BlockDataID pdfsID_, FlagUID domainUID_, double in_v_x, double in_v_y, double in_v_z)
+      : blocks_(blocks), flagID(flagID_), pdfsID(pdfsID_), domainUID(domainUID_)
+   {
+      NoSlipObject = std::make_shared< lbm::NoSlip >(blocks, pdfsID);
+      OutflowObject = std::make_shared< lbm::Outflow >(blocks, pdfsID, pdfcpu); //-->modified w.r.t. the code-generated case, as Out1flow object requires 3 inputs: the 2nd (3rd) is the GPU (CPU) field ID
+      FreeSlipObject = std::make_shared< lbm::FreeSlip >(blocks, pdfsID);
+      UBBObject = std::make_shared< lbm::UBB >(blocks, pdfsID, in_v_x, in_v_y, in_v_z);
+      
+
+      NoSlipObject->fillFromFlagField<FlagField_T>(blocks, flagID, walberla::FlagUID("NoSlip"), domainUID);
+      OutflowObject->fillFromFlagField<FlagField_T>(blocks, flagID, walberla::FlagUID("Outflow"), domainUID);
+      FreeSlipObject->fillFromFlagField<FlagField_T>(blocks, flagID, walberla::FlagUID("FreeSlip"), domainUID);
+      UBBObject->fillFromFlagField<FlagField_T>(blocks, flagID, walberla::FlagUID("UBB"), domainUID);
+      
+   }
+
+   void run (IBlock * block, gpuStream_t stream = nullptr)
+   {
+      NoSlipObject->run(block, stream);
+      OutflowObject->run(block, stream);
+      FreeSlipObject->run(block, stream);
+      UBBObject->run(block, stream);
+      
+   }
+
+   void inner (IBlock * block, gpuStream_t stream = nullptr)
+   {
+      NoSlipObject->inner(block, stream);
+      OutflowObject->inner(block, stream);
+      FreeSlipObject->inner(block, stream);
+      UBBObject->inner(block, stream);
+      
+   }
+
+   void outer (IBlock * block, gpuStream_t stream = nullptr)
+   {
+      NoSlipObject->outer(block, stream);
+      OutflowObject->outer(block, stream);
+      FreeSlipObject->outer(block, stream);
+      UBBObject->outer(block, stream);
+      
+   }
+
+   void operator() (IBlock * block, gpuStream_t stream = nullptr)
+   {
+      run(block, stream);
+   }
+
+   std::function<void (IBlock *)> getSweep(Type type = Type::ALL, gpuStream_t stream = nullptr)
+   {
+      switch (type)
+      {
+      case Type::INNER:
+         return [this, stream](IBlock* block) { this->inner(block, stream); };
+      case Type::OUTER:
+         return [this, stream](IBlock* block) { this->outer(block, stream); };
+      default:
+         return [this, stream](IBlock* block) { this->run(block, stream); };
+      }
+   }
+
+   weak_ptr< StructuredBlockStorage > blocks_;
+   BlockDataID flagID;
+   BlockDataID pdfsID;
+   walberla::FlagUID domainUID;
+
+   shared_ptr<lbm::NoSlip> NoSlipObject;
+   shared_ptr<lbm::Outflow> OutflowObject;
+   shared_ptr<lbm::FreeSlip> FreeSlipObject;
+   shared_ptr<lbm::UBB> UBBObject;
+   
+};
+
+}
+}
diff --git a/apps/benchmarks/GridRefSphere/CMakeLists.txt b/apps/benchmarks/GridRefSphere/CMakeLists.txt
new file mode 100644
index 000000000..266f50ae7
--- /dev/null
+++ b/apps/benchmarks/GridRefSphere/CMakeLists.txt
@@ -0,0 +1,16 @@
+waLBerla_link_files_to_builddir( *.prm )
+waLBerla_link_files_to_builddir( *.obj )
+
+#walberla_generate_target_from_python(NAME gridRef_lib
+#	FILE gridRef_SPHERE.py
+#	OUT_FILES  StorageSpecification.cu   StorageSpecification.h
+#                   SweepCollection.cu        SweepCollection.h
+#	           NoSlip.cu                 NoSlip.h
+#	   	   Outflow.cu                Outflow.h
+#	   	   FreeSlip.cu               FreeSlip.h
+#	   	   UBB.cu                    UBB.h
+#		   BoundaryCollection.h) 
+
+walberla_add_executable ( NAME GRID_REF_SPHERE
+	FILES refGrid_SPHERE.cpp BoundaryCollection.h NoSlip.cu NoSlip.h Outflow.cu Outflow.h FreeSlip.cu FreeSlip.h UBB.cu UBB.h SweepCollection.h SweepCollection.cu StorageSpecification.h StorageSpecification.cu 
+        DEPENDS blockforest boundary core domain_decomposition field geometry gpu timeloop vtk mesh)
diff --git a/apps/benchmarks/GridRefSphere/FreeSlip.cu b/apps/benchmarks/GridRefSphere/FreeSlip.cu
new file mode 100644
index 000000000..77c733ba2
--- /dev/null
+++ b/apps/benchmarks/GridRefSphere/FreeSlip.cu
@@ -0,0 +1,145 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file FreeSlip.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+#include "FreeSlip.h"
+#include "gpu/ErrorChecking.h"
+
+
+#define FUNC_PREFIX __global__
+
+using namespace std;
+
+namespace walberla {
+namespace lbm {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+//NOLINTBEGIN(readability-non-const-parameter*)
+namespace internal_freeslip_even {
+static FUNC_PREFIX __launch_bounds__(256) void freeslip_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
+{
+   
+   const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; 
+   const int32_t f_in_inv_offsets_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1 }; 
+   const int32_t f_in_inv_offsets_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0 }; 
+   const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1 }; 
+   
+   
+   
+   const int32_t neighbour_offset_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1 }; 
+   const int32_t neighbour_offset_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0 }; 
+   const int32_t neighbour_offset_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1 }; 
+   
+   if (blockDim.x*blockIdx.x + threadIdx.x < indexVectorSize)
+   {
+      uint8_t * RESTRICT _data_indexVector_10 = _data_indexVector;
+      const int64_t x = *((int32_t * )(& _data_indexVector_10[32*blockDim.x*blockIdx.x + 32*threadIdx.x]));
+      uint8_t * RESTRICT _data_indexVector_14 = _data_indexVector + 4;
+      const int64_t y = *((int32_t * )(& _data_indexVector_14[32*blockDim.x*blockIdx.x + 32*threadIdx.x]));
+      uint8_t * RESTRICT _data_indexVector_18 = _data_indexVector + 8;
+      const int64_t z = *((int32_t * )(& _data_indexVector_18[32*blockDim.x*blockIdx.x + 32*threadIdx.x]));
+      uint8_t * RESTRICT _data_indexVector_112 = _data_indexVector + 12;
+      const int32_t dir = *((int32_t * )(& _data_indexVector_112[32*blockDim.x*blockIdx.x + 32*threadIdx.x]));
+      double * RESTRICT  _data_pdfs700fc22ba4e33a75 = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir];
+      uint8_t * RESTRICT _data_indexVector_116 = _data_indexVector + 16;
+      uint8_t * RESTRICT _data_indexVector_120 = _data_indexVector + 20;
+      uint8_t * RESTRICT _data_indexVector_124 = _data_indexVector + 24;
+      uint8_t * RESTRICT _data_indexVector_128 = _data_indexVector + 28;
+      double * RESTRICT  _data_pdfsedc4b2497fcff986 = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_1*(*((int32_t * )(& _data_indexVector_120[32*blockDim.x*blockIdx.x + 32*threadIdx.x])) + neighbour_offset_y[dir]) + _stride_pdfs_2*z + _stride_pdfs_2*(*((int32_t * )(& _data_indexVector_124[32*blockDim.x*blockIdx.x + 32*threadIdx.x])) + neighbour_offset_z[dir]) + _stride_pdfs_3**((int32_t * )(& _data_indexVector_128[32*blockDim.x*blockIdx.x + 32*threadIdx.x]));
+      _data_pdfs700fc22ba4e33a75[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir]] = _data_pdfsedc4b2497fcff986[_stride_pdfs_0*x + _stride_pdfs_0*(*((int32_t * )(& _data_indexVector_116[32*blockDim.x*blockIdx.x + 32*threadIdx.x])) + neighbour_offset_x[dir])];
+   } 
+}
+}
+
+//NOLINTEND(readability-non-const-parameter*)
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+
+void FreeSlip::run_impl(IBlock * block, IndexVectors::Type type, gpuStream_t stream)
+{
+   auto * indexVectors = block->getData<IndexVectors>(indexVectorID);
+   int32_t indexVectorSize = int32_c( indexVectors->indexVector(type).size() );
+   if( indexVectorSize == 0)
+      return;
+
+   auto pointer = indexVectors->pointerGpu(type);
+   
+
+   uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer);
+
+   auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+
+   uint8_t timestep = pdfs->getTimestep();
+   
+   WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))
+    double * RESTRICT  _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    if(((timestep & 1) ^ 1)) {
+        dim3 _block(uint32_c(((256 < indexVectorSize) ? 256 : indexVectorSize)), uint32_c(1), uint32_c(1));
+        dim3 _grid(uint32_c(( (indexVectorSize) % (((256 < indexVectorSize) ? 256 : indexVectorSize)) == 0 ? (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) : ( (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) ) +1 )), uint32_c(1), uint32_c(1));
+        internal_freeslip_even::freeslip_even<<<_grid, _block, 0, stream>>>(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
+    } else {
+        dim3 _block(uint32_c(((256 < indexVectorSize) ? 256 : indexVectorSize)), uint32_c(1), uint32_c(1));
+        dim3 _grid(uint32_c(( (indexVectorSize) % (((256 < indexVectorSize) ? 256 : indexVectorSize)) == 0 ? (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) : ( (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) ) +1 )), uint32_c(1), uint32_c(1));
+        internal_freeslip_even::freeslip_even<<<_grid, _block, 0, stream>>>(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
+    }
+}
+
+void FreeSlip::run(IBlock * block, gpuStream_t stream)
+{
+   run_impl(block, IndexVectors::ALL, stream);
+}
+
+void FreeSlip::inner(IBlock * block, gpuStream_t stream)
+{
+   run_impl(block, IndexVectors::INNER, stream);
+}
+
+void FreeSlip::outer(IBlock * block, gpuStream_t stream)
+{
+   run_impl(block, IndexVectors::OUTER, stream);
+}
+
+} // namespace lbm
+} // namespace walberla
+
diff --git a/apps/benchmarks/GridRefSphere/FreeSlip.h b/apps/benchmarks/GridRefSphere/FreeSlip.h
new file mode 100644
index 000000000..236d8863e
--- /dev/null
+++ b/apps/benchmarks/GridRefSphere/FreeSlip.h
@@ -0,0 +1,1120 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file FreeSlip.h
+//! \\author pystencils
+//======================================================================================================================
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "gpu/FieldCopy.h"
+#include "gpu/GPUField.h"
+#include "gpu/GPUWrapper.h"
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "blockforest/StructuredBlockForest.h"
+#include "field/FlagField.h"
+#include "core/debug/Debug.h"
+
+#include <set>
+#include <vector>
+
+
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace lbm {
+
+
+class FreeSlip
+{
+public:
+    struct IndexInfo { 
+        int32_t x;
+        int32_t y;
+        int32_t z;
+        int32_t dir;
+        int32_t wnx;
+        int32_t wny;
+        int32_t wnz;
+        int32_t ref_dir;
+        IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_) : x(x_), y(y_), z(z_), dir(dir_), wnx(), wny(), wnz(), ref_dir() {}
+        bool operator==(const IndexInfo & o) const {
+            return x == o.x && y == o.y && z == o.z && dir == o.dir && wnx == o.wnx && wny == o.wny && wnz == o.wnz && ref_dir == o.ref_dir;
+        }
+    };
+
+
+
+    class IndexVectors
+    {
+    public:
+        using CpuIndexVector = std::vector<IndexInfo>;
+
+        enum Type {
+            ALL = 0,
+            INNER = 1,
+            OUTER = 2,
+            NUM_TYPES = 3
+        };
+
+        IndexVectors() = default;
+        bool operator==(IndexVectors const &other) const { return other.cpuVectors_ == cpuVectors_; }
+
+        ~IndexVectors() {
+            for( auto & gpuVec: gpuVectors_)
+               WALBERLA_GPU_CHECK(gpuFree( gpuVec ));
+        }
+        CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; }
+        IndexInfo * pointerCpu(Type t)  { return cpuVectors_[t].data(); }
+
+        IndexInfo * pointerGpu(Type t)  { return gpuVectors_[t]; }
+        void syncGPU()
+        {
+            for( auto & gpuVec: gpuVectors_)
+               WALBERLA_GPU_CHECK(gpuFree( gpuVec ));
+            gpuVectors_.resize( cpuVectors_.size() );
+
+            WALBERLA_ASSERT_EQUAL(cpuVectors_.size(), NUM_TYPES);
+            for(size_t i=0; i < cpuVectors_.size(); ++i )
+            {
+                auto & gpuVec = gpuVectors_[i];
+                auto & cpuVec = cpuVectors_[i];
+                WALBERLA_GPU_CHECK(gpuMalloc( &gpuVec, sizeof(IndexInfo) * cpuVec.size() ));
+                WALBERLA_GPU_CHECK(gpuMemcpy( gpuVec, &cpuVec[0], sizeof(IndexInfo) * cpuVec.size(), gpuMemcpyHostToDevice ));
+            }
+        }
+
+    private:
+        std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+
+        using GpuIndexVector = IndexInfo *;
+        std::vector<GpuIndexVector> gpuVectors_;
+    };
+
+    FreeSlip( const shared_ptr<StructuredBlockForest> & blocks,
+                   BlockDataID pdfsID_)
+        : pdfsID(pdfsID_)
+    {
+        auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); };
+        indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_FreeSlip");
+    };
+
+    void run (IBlock * block, gpuStream_t stream = nullptr);
+
+    void operator() (IBlock * block, gpuStream_t stream = nullptr)
+    {
+        run(block, stream);
+    }
+
+    void inner (IBlock * block, gpuStream_t stream = nullptr);
+
+    void outer (IBlock * block, gpuStream_t stream = nullptr);
+
+    std::function<void (IBlock *)> getSweep(gpuStream_t stream = nullptr)
+    {
+        return [this, stream]
+               (IBlock * b)
+               { this->run(b, stream); };
+    }
+
+    std::function<void (IBlock *)> getInnerSweep(gpuStream_t stream = nullptr)
+    {
+        return [this, stream]
+               (IBlock * b)
+               { this->inner(b, stream); };
+    }
+
+    std::function<void (IBlock *)> getOuterSweep(gpuStream_t stream = nullptr)
+    {
+        return [this, stream]
+               (IBlock * b)
+               { this->outer(b, stream); };
+    }
+
+    template<typename FlagField_T>
+    void fillFromFlagField( const shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID)
+    {
+        for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+            fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID );
+    }
+
+
+    template<typename FlagField_T>
+    void fillFromFlagField(IBlock * block, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID )
+    {
+        auto * indexVectors = block->getData< IndexVectors > ( indexVectorID );
+        auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+        auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+        auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+        auto * flagField = block->getData< FlagField_T > ( flagFieldID );
+        
+
+        if( !(flagField->flagExists(boundaryFlagUID) && flagField->flagExists(domainFlagUID) ))
+            return;
+
+        auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+        auto domainFlag = flagField->getFlag(domainFlagUID);
+
+        auto inner = flagField->xyzSize();
+        inner.expand( cell_idx_t(-1) );
+
+        indexVectorAll.clear();
+        indexVectorInner.clear();
+        indexVectorOuter.clear();
+
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  0 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(0, 0, 0);
+                int32_t ref_dir = 0; // dir: 0
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = 0;
+                   element.wnz = 0;
+                   ref_dir = 0;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  1 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(0, 1, 0);
+                int32_t ref_dir = 2; // dir: 1
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + -1, n.z(), domainFlag ) )
+                {
+                   element.wny = -1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = -1;
+                   element.wnz = 0;
+                   ref_dir = 1;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  2 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(0, -1, 0);
+                int32_t ref_dir = 1; // dir: 2
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 1, n.z(), domainFlag ) )
+                {
+                   element.wny = 1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = 1;
+                   element.wnz = 0;
+                   ref_dir = 2;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  3 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(-1, 0, 0);
+                int32_t ref_dir = 4; // dir: 3
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 1;
+                   element.wny = 0;
+                   element.wnz = 0;
+                   ref_dir = 3;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  4 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(1, 0, 0);
+                int32_t ref_dir = 3; // dir: 4
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + -1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = -1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = -1;
+                   element.wny = 0;
+                   element.wnz = 0;
+                   ref_dir = 4;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  5 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(0, 0, 1);
+                int32_t ref_dir = 6; // dir: 5
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + -1, domainFlag ) )
+                {
+                   element.wnz = -1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = 0;
+                   element.wnz = -1;
+                   ref_dir = 5;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  6 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(0, 0, -1);
+                int32_t ref_dir = 5; // dir: 6
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 1, domainFlag ) )
+                {
+                   element.wnz = 1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = 0;
+                   element.wnz = 1;
+                   ref_dir = 6;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  7 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(-1, 1, 0);
+                int32_t ref_dir = 10; // dir: 7
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + -1, n.z(), domainFlag ) )
+                {
+                   element.wny = -1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 1;
+                   element.wny = -1;
+                   element.wnz = 0;
+                   ref_dir = 7;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  8 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(1, 1, 0);
+                int32_t ref_dir = 9; // dir: 8
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + -1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = -1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + -1, n.z(), domainFlag ) )
+                {
+                   element.wny = -1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = -1;
+                   element.wny = -1;
+                   element.wnz = 0;
+                   ref_dir = 8;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  9 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(-1, -1, 0);
+                int32_t ref_dir = 8; // dir: 9
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 1, n.z(), domainFlag ) )
+                {
+                   element.wny = 1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 1;
+                   element.wny = 1;
+                   element.wnz = 0;
+                   ref_dir = 9;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  10 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(1, -1, 0);
+                int32_t ref_dir = 7; // dir: 10
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + -1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = -1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 1, n.z(), domainFlag ) )
+                {
+                   element.wny = 1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = -1;
+                   element.wny = 1;
+                   element.wnz = 0;
+                   ref_dir = 10;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  11 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(0, 1, 1);
+                int32_t ref_dir = 16; // dir: 11
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + -1, n.z(), domainFlag ) )
+                {
+                   element.wny = -1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + -1, domainFlag ) )
+                {
+                   element.wnz = -1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = -1;
+                   element.wnz = -1;
+                   ref_dir = 11;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  12 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(0, -1, 1);
+                int32_t ref_dir = 15; // dir: 12
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 1, n.z(), domainFlag ) )
+                {
+                   element.wny = 1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + -1, domainFlag ) )
+                {
+                   element.wnz = -1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = 1;
+                   element.wnz = -1;
+                   ref_dir = 12;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  13 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(-1, 0, 1);
+                int32_t ref_dir = 18; // dir: 13
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + -1, domainFlag ) )
+                {
+                   element.wnz = -1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 1;
+                   element.wny = 0;
+                   element.wnz = -1;
+                   ref_dir = 13;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  14 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(1, 0, 1);
+                int32_t ref_dir = 17; // dir: 14
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + -1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = -1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + -1, domainFlag ) )
+                {
+                   element.wnz = -1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = -1;
+                   element.wny = 0;
+                   element.wnz = -1;
+                   ref_dir = 14;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  15 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(0, 1, -1);
+                int32_t ref_dir = 12; // dir: 15
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + -1, n.z(), domainFlag ) )
+                {
+                   element.wny = -1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 1, domainFlag ) )
+                {
+                   element.wnz = 1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = -1;
+                   element.wnz = 1;
+                   ref_dir = 15;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  16 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(0, -1, -1);
+                int32_t ref_dir = 11; // dir: 16
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 1, n.z(), domainFlag ) )
+                {
+                   element.wny = 1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 1, domainFlag ) )
+                {
+                   element.wnz = 1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = 1;
+                   element.wnz = 1;
+                   ref_dir = 16;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  17 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(-1, 0, -1);
+                int32_t ref_dir = 14; // dir: 17
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 1, domainFlag ) )
+                {
+                   element.wnz = 1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 1;
+                   element.wny = 0;
+                   element.wnz = 1;
+                   ref_dir = 17;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  18 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(1, 0, -1);
+                int32_t ref_dir = 13; // dir: 18
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + -1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = -1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 1, domainFlag ) )
+                {
+                   element.wnz = 1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = -1;
+                   element.wny = 0;
+                   element.wnz = 1;
+                   ref_dir = 18;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        
+        
+
+        indexVectors->syncGPU();
+    }
+
+private:
+    void run_impl(IBlock * block, IndexVectors::Type type, gpuStream_t stream = nullptr);
+
+    BlockDataID indexVectorID;
+    
+public:
+    BlockDataID pdfsID;
+};
+
+
+
+} // namespace lbm
+} // namespace walberla
\ No newline at end of file
diff --git a/apps/benchmarks/GridRefSphere/NoSlip.cu b/apps/benchmarks/GridRefSphere/NoSlip.cu
new file mode 100644
index 000000000..10dd9b3e2
--- /dev/null
+++ b/apps/benchmarks/GridRefSphere/NoSlip.cu
@@ -0,0 +1,135 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file NoSlip.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+#include "NoSlip.h"
+#include "gpu/ErrorChecking.h"
+
+
+#define FUNC_PREFIX __global__
+
+using namespace std;
+
+namespace walberla {
+namespace lbm {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+//NOLINTBEGIN(readability-non-const-parameter*)
+namespace internal_noslip_even {
+static FUNC_PREFIX __launch_bounds__(256) void noslip_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
+{
+   
+   const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; 
+   const int32_t f_in_inv_offsets_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1 }; 
+   const int32_t f_in_inv_offsets_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0 }; 
+   const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1 }; 
+   
+   if (blockDim.x*blockIdx.x + threadIdx.x < indexVectorSize)
+   {
+      uint8_t * RESTRICT _data_indexVector_10 = _data_indexVector;
+      const int64_t x = *((int32_t * )(& _data_indexVector_10[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
+      uint8_t * RESTRICT _data_indexVector_14 = _data_indexVector + 4;
+      const int64_t y = *((int32_t * )(& _data_indexVector_14[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
+      uint8_t * RESTRICT _data_indexVector_18 = _data_indexVector + 8;
+      const int64_t z = *((int32_t * )(& _data_indexVector_18[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
+      uint8_t * RESTRICT _data_indexVector_112 = _data_indexVector + 12;
+      const int32_t dir = *((int32_t * )(& _data_indexVector_112[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
+      double * RESTRICT  _data_pdfs700fc22ba4e33a75 = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir];
+      double * RESTRICT  _data_pdfs_10_204e9ebfcded49d05e = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir;
+      _data_pdfs700fc22ba4e33a75[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir]] = _data_pdfs_10_204e9ebfcded49d05e[_stride_pdfs_0*x];
+   } 
+}
+}
+
+//NOLINTEND(readability-non-const-parameter*)
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+
+void NoSlip::run_impl(IBlock * block, IndexVectors::Type type, gpuStream_t stream)
+{
+   auto * indexVectors = block->getData<IndexVectors>(indexVectorID);
+   int32_t indexVectorSize = int32_c( indexVectors->indexVector(type).size() );
+   if( indexVectorSize == 0)
+      return;
+
+   auto pointer = indexVectors->pointerGpu(type);
+   
+
+   uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer);
+
+   auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+
+   uint8_t timestep = pdfs->getTimestep();
+   
+   WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))
+    double * RESTRICT  _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    if(((timestep & 1) ^ 1)) {
+        dim3 _block(uint32_c(((256 < indexVectorSize) ? 256 : indexVectorSize)), uint32_c(1), uint32_c(1));
+        dim3 _grid(uint32_c(( (indexVectorSize) % (((256 < indexVectorSize) ? 256 : indexVectorSize)) == 0 ? (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) : ( (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) ) +1 )), uint32_c(1), uint32_c(1));
+        internal_noslip_even::noslip_even<<<_grid, _block, 0, stream>>>(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
+    } else {
+        dim3 _block(uint32_c(((256 < indexVectorSize) ? 256 : indexVectorSize)), uint32_c(1), uint32_c(1));
+        dim3 _grid(uint32_c(( (indexVectorSize) % (((256 < indexVectorSize) ? 256 : indexVectorSize)) == 0 ? (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) : ( (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) ) +1 )), uint32_c(1), uint32_c(1));
+        internal_noslip_even::noslip_even<<<_grid, _block, 0, stream>>>(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
+    }
+}
+
+void NoSlip::run(IBlock * block, gpuStream_t stream)
+{
+   run_impl(block, IndexVectors::ALL, stream);
+}
+
+void NoSlip::inner(IBlock * block, gpuStream_t stream)
+{
+   run_impl(block, IndexVectors::INNER, stream);
+}
+
+void NoSlip::outer(IBlock * block, gpuStream_t stream)
+{
+   run_impl(block, IndexVectors::OUTER, stream);
+}
+
+} // namespace lbm
+} // namespace walberla
+
diff --git a/apps/benchmarks/GridRefSphere/NoSlip.h b/apps/benchmarks/GridRefSphere/NoSlip.h
new file mode 100644
index 000000000..2fc822e23
--- /dev/null
+++ b/apps/benchmarks/GridRefSphere/NoSlip.h
@@ -0,0 +1,527 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file NoSlip.h
+//! \\author pystencils
+//======================================================================================================================
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "gpu/FieldCopy.h"
+#include "gpu/GPUField.h"
+#include "gpu/GPUWrapper.h"
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "blockforest/StructuredBlockForest.h"
+#include "field/FlagField.h"
+#include "core/debug/Debug.h"
+
+#include <set>
+#include <vector>
+
+
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace lbm {
+
+
+class NoSlip
+{
+public:
+    struct IndexInfo { 
+        int32_t x;
+        int32_t y;
+        int32_t z;
+        int32_t dir;
+        IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_) : x(x_), y(y_), z(z_), dir(dir_) {}
+        bool operator==(const IndexInfo & o) const {
+            return x == o.x && y == o.y && z == o.z && dir == o.dir;
+        }
+    };
+
+
+
+    class IndexVectors
+    {
+    public:
+        using CpuIndexVector = std::vector<IndexInfo>;
+
+        enum Type {
+            ALL = 0,
+            INNER = 1,
+            OUTER = 2,
+            NUM_TYPES = 3
+        };
+
+        IndexVectors() = default;
+        bool operator==(IndexVectors const &other) const { return other.cpuVectors_ == cpuVectors_; }
+
+        ~IndexVectors() {
+            for( auto & gpuVec: gpuVectors_)
+               WALBERLA_GPU_CHECK(gpuFree( gpuVec ));
+        }
+        CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; }
+        IndexInfo * pointerCpu(Type t)  { return cpuVectors_[t].data(); }
+
+        IndexInfo * pointerGpu(Type t)  { return gpuVectors_[t]; }
+        void syncGPU()
+        {
+            for( auto & gpuVec: gpuVectors_)
+               WALBERLA_GPU_CHECK(gpuFree( gpuVec ));
+            gpuVectors_.resize( cpuVectors_.size() );
+
+            WALBERLA_ASSERT_EQUAL(cpuVectors_.size(), NUM_TYPES);
+            for(size_t i=0; i < cpuVectors_.size(); ++i )
+            {
+                auto & gpuVec = gpuVectors_[i];
+                auto & cpuVec = cpuVectors_[i];
+                WALBERLA_GPU_CHECK(gpuMalloc( &gpuVec, sizeof(IndexInfo) * cpuVec.size() ));
+                WALBERLA_GPU_CHECK(gpuMemcpy( gpuVec, &cpuVec[0], sizeof(IndexInfo) * cpuVec.size(), gpuMemcpyHostToDevice ));
+            }
+        }
+
+    private:
+        std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+
+        using GpuIndexVector = IndexInfo *;
+        std::vector<GpuIndexVector> gpuVectors_;
+    };
+
+    NoSlip( const shared_ptr<StructuredBlockForest> & blocks,
+                   BlockDataID pdfsID_)
+        : pdfsID(pdfsID_)
+    {
+        auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); };
+        indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_NoSlip");
+    };
+
+    void run (IBlock * block, gpuStream_t stream = nullptr);
+
+    void operator() (IBlock * block, gpuStream_t stream = nullptr)
+    {
+        run(block, stream);
+    }
+
+    void inner (IBlock * block, gpuStream_t stream = nullptr);
+
+    void outer (IBlock * block, gpuStream_t stream = nullptr);
+
+    std::function<void (IBlock *)> getSweep(gpuStream_t stream = nullptr)
+    {
+        return [this, stream]
+               (IBlock * b)
+               { this->run(b, stream); };
+    }
+
+    std::function<void (IBlock *)> getInnerSweep(gpuStream_t stream = nullptr)
+    {
+        return [this, stream]
+               (IBlock * b)
+               { this->inner(b, stream); };
+    }
+
+    std::function<void (IBlock *)> getOuterSweep(gpuStream_t stream = nullptr)
+    {
+        return [this, stream]
+               (IBlock * b)
+               { this->outer(b, stream); };
+    }
+
+    template<typename FlagField_T>
+    void fillFromFlagField( const shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID)
+    {
+        for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+            fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID );
+    }
+
+
+    template<typename FlagField_T>
+    void fillFromFlagField(IBlock * block, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID )
+    {
+        auto * indexVectors = block->getData< IndexVectors > ( indexVectorID );
+        auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+        auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+        auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+        auto * flagField = block->getData< FlagField_T > ( flagFieldID );
+        
+
+        if( !(flagField->flagExists(boundaryFlagUID) && flagField->flagExists(domainFlagUID) ))
+            return;
+
+        auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+        auto domainFlag = flagField->getFlag(domainFlagUID);
+
+        auto inner = flagField->xyzSize();
+        inner.expand( cell_idx_t(-1) );
+
+        indexVectorAll.clear();
+        indexVectorInner.clear();
+        indexVectorOuter.clear();
+
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  0 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  1 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  2 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  3 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  4 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  5 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  6 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  7 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  8 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  9 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  10 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  11 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  12 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  13 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  14 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  15 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  16 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  17 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  18 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        
+        
+
+        indexVectors->syncGPU();
+    }
+
+private:
+    void run_impl(IBlock * block, IndexVectors::Type type, gpuStream_t stream = nullptr);
+
+    BlockDataID indexVectorID;
+    
+public:
+    BlockDataID pdfsID;
+};
+
+
+
+} // namespace lbm
+} // namespace walberla
\ No newline at end of file
diff --git a/apps/benchmarks/GridRefSphere/Outflow.cu b/apps/benchmarks/GridRefSphere/Outflow.cu
new file mode 100644
index 000000000..d64872112
--- /dev/null
+++ b/apps/benchmarks/GridRefSphere/Outflow.cu
@@ -0,0 +1,147 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file Outflow.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+#include "Outflow.h"
+#include "gpu/ErrorChecking.h"
+
+
+#define FUNC_PREFIX __global__
+
+using namespace std;
+
+namespace walberla {
+namespace lbm {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+//NOLINTBEGIN(readability-non-const-parameter*)
+namespace internal_outflow_even {
+static FUNC_PREFIX __launch_bounds__(256) void outflow_even(uint8_t * RESTRICT  _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
+{
+   
+   const int32_t f_out_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; 
+   const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; 
+   const int32_t f_in_inv_offsets_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1 }; 
+   const int32_t f_in_inv_offsets_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0 }; 
+   const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1 }; 
+   
+   
+   
+   const int32_t neighbour_offset_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1 }; 
+   const int32_t neighbour_offset_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0 }; 
+   const int32_t neighbour_offset_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1 }; 
+   
+   if (blockDim.x*blockIdx.x + threadIdx.x < indexVectorSize)
+   {
+      uint8_t * RESTRICT  _data_indexVector_10 = _data_indexVector;
+      const int64_t x = *((int32_t * )(& _data_indexVector_10[32*blockDim.x*blockIdx.x + 32*threadIdx.x]));
+      uint8_t * RESTRICT  _data_indexVector_14 = _data_indexVector + 4;
+      const int64_t y = *((int32_t * )(& _data_indexVector_14[32*blockDim.x*blockIdx.x + 32*threadIdx.x]));
+      uint8_t * RESTRICT  _data_indexVector_18 = _data_indexVector + 8;
+      const int64_t z = *((int32_t * )(& _data_indexVector_18[32*blockDim.x*blockIdx.x + 32*threadIdx.x]));
+      uint8_t * RESTRICT  _data_indexVector_112 = _data_indexVector + 12;
+      const int32_t dir = *((int32_t * )(& _data_indexVector_112[32*blockDim.x*blockIdx.x + 32*threadIdx.x]));
+      uint8_t * RESTRICT  _data_indexVector_124 = _data_indexVector + 24;
+      uint8_t * RESTRICT  _data_indexVector_116 = _data_indexVector + 16;
+      const double pdf_inter = 0.42264973081037427**((double * )(& _data_indexVector_124[32*blockDim.x*blockIdx.x + 32*threadIdx.x])) + 0.57735026918962573**((double * )(& _data_indexVector_116[32*blockDim.x*blockIdx.x + 32*threadIdx.x]));
+      double * RESTRICT  _data_pdfs700fc22ba4e33a75 = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir];
+      _data_pdfs700fc22ba4e33a75[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir]] = pdf_inter;
+      double * RESTRICT  _data_pdfsfefb6996b6974314 = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_1*neighbour_offset_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*neighbour_offset_z[dir] + _stride_pdfs_3*f_out_inv_dir_idx[dir];
+      *((double * )(& _data_indexVector_116[32*blockDim.x*blockIdx.x + 32*threadIdx.x])) = _data_pdfsfefb6996b6974314[_stride_pdfs_0*x + _stride_pdfs_0*(neighbour_offset_x[dir] - 1)];
+      *((double * )(& _data_indexVector_124[32*blockDim.x*blockIdx.x + 32*threadIdx.x])) = pdf_inter;
+   } 
+}
+}
+
+//NOLINTEND(readability-non-const-parameter*)
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+
+void Outflow::run_impl(IBlock * block, IndexVectors::Type type, gpuStream_t stream)
+{
+   auto * indexVectors = block->getData<IndexVectors>(indexVectorID);
+   int32_t indexVectorSize = int32_c( indexVectors->indexVector(type).size() );
+   if( indexVectorSize == 0)
+      return;
+
+   auto pointer = indexVectors->pointerGpu(type);
+   
+
+   uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer);
+
+   auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+
+   uint8_t timestep = pdfs->getTimestep();
+   
+   WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))
+    double * RESTRICT  _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    if(((timestep & 1) ^ 1)) {
+        dim3 _block(uint32_c(((256 < indexVectorSize) ? 256 : indexVectorSize)), uint32_c(1), uint32_c(1));
+        dim3 _grid(uint32_c(( (indexVectorSize) % (((256 < indexVectorSize) ? 256 : indexVectorSize)) == 0 ? (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) : ( (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) ) +1 )), uint32_c(1), uint32_c(1));
+        internal_outflow_even::outflow_even<<<_grid, _block, 0, stream>>>(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
+    } else {
+        dim3 _block(uint32_c(((256 < indexVectorSize) ? 256 : indexVectorSize)), uint32_c(1), uint32_c(1));
+        dim3 _grid(uint32_c(( (indexVectorSize) % (((256 < indexVectorSize) ? 256 : indexVectorSize)) == 0 ? (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) : ( (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) ) +1 )), uint32_c(1), uint32_c(1));
+        internal_outflow_even::outflow_even<<<_grid, _block, 0, stream>>>(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
+    }
+}
+
+void Outflow::run(IBlock * block, gpuStream_t stream)
+{
+   run_impl(block, IndexVectors::ALL, stream);
+}
+
+void Outflow::inner(IBlock * block, gpuStream_t stream)
+{
+   run_impl(block, IndexVectors::INNER, stream);
+}
+
+void Outflow::outer(IBlock * block, gpuStream_t stream)
+{
+   run_impl(block, IndexVectors::OUTER, stream);
+}
+
+} // namespace lbm
+} // namespace walberla
+
diff --git a/apps/benchmarks/GridRefSphere/Outflow.h b/apps/benchmarks/GridRefSphere/Outflow.h
new file mode 100644
index 000000000..e5e3118e4
--- /dev/null
+++ b/apps/benchmarks/GridRefSphere/Outflow.h
@@ -0,0 +1,296 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file Outflow.h
+//! \\author pystencils
+//======================================================================================================================
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "gpu/FieldCopy.h"
+#include "gpu/GPUField.h"
+#include "gpu/GPUWrapper.h"
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "blockforest/StructuredBlockForest.h"
+#include "field/FlagField.h"
+#include "core/debug/Debug.h"
+
+#include <set>
+#include <vector>
+
+
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace lbm {
+
+
+class Outflow
+{
+public:
+    struct IndexInfo { 
+        int32_t x;
+        int32_t y;
+        int32_t z;
+        int32_t dir;
+        double pdf;
+        double pdf_nd;
+        IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_) : x(x_), y(y_), z(z_), dir(dir_), pdf(), pdf_nd() {}
+        bool operator==(const IndexInfo & o) const {
+            return x == o.x && y == o.y && z == o.z && dir == o.dir && floatIsEqual(pdf, o.pdf) && floatIsEqual(pdf_nd, o.pdf_nd);
+        }
+    };
+
+
+
+    class IndexVectors
+    {
+    public:
+        using CpuIndexVector = std::vector<IndexInfo>;
+
+        enum Type {
+            ALL = 0,
+            INNER = 1,
+            OUTER = 2,
+            NUM_TYPES = 3
+        };
+
+        IndexVectors() = default;
+        bool operator==(IndexVectors const &other) const { return other.cpuVectors_ == cpuVectors_; }
+
+        ~IndexVectors() {
+            for( auto & gpuVec: gpuVectors_)
+               WALBERLA_GPU_CHECK(gpuFree( gpuVec ));
+        }
+        CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; }
+        IndexInfo * pointerCpu(Type t)  { return cpuVectors_[t].data(); }
+
+        IndexInfo * pointerGpu(Type t)  { return gpuVectors_[t]; }
+        void syncGPU()
+        {
+            for( auto & gpuVec: gpuVectors_)
+               WALBERLA_GPU_CHECK(gpuFree( gpuVec ));
+            gpuVectors_.resize( cpuVectors_.size() );
+
+            WALBERLA_ASSERT_EQUAL(cpuVectors_.size(), NUM_TYPES);
+            for(size_t i=0; i < cpuVectors_.size(); ++i )
+            {
+                auto & gpuVec = gpuVectors_[i];
+                auto & cpuVec = cpuVectors_[i];
+                WALBERLA_GPU_CHECK(gpuMalloc( &gpuVec, sizeof(IndexInfo) * cpuVec.size() ));
+                WALBERLA_GPU_CHECK(gpuMemcpy( gpuVec, &cpuVec[0], sizeof(IndexInfo) * cpuVec.size(), gpuMemcpyHostToDevice ));
+            }
+        }
+
+    private:
+        std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+
+        using GpuIndexVector = IndexInfo *;
+        std::vector<GpuIndexVector> gpuVectors_;
+    };
+
+    Outflow( const shared_ptr<StructuredBlockForest> & blocks,
+                   BlockDataID pdfsID_, BlockDataID pdfsCPUID_)
+        :pdfsCPUID(pdfsCPUID_), pdfsID(pdfsID_)
+    {
+        auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); };
+        indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_Outflow");
+    };
+
+    void run (IBlock * block, gpuStream_t stream = nullptr);
+
+    void operator() (IBlock * block, gpuStream_t stream = nullptr)
+    {
+        run(block, stream);
+    }
+
+    void inner (IBlock * block, gpuStream_t stream = nullptr);
+
+    void outer (IBlock * block, gpuStream_t stream = nullptr);
+
+    std::function<void (IBlock *)> getSweep(gpuStream_t stream = nullptr)
+    {
+        return [this, stream]
+               (IBlock * b)
+               { this->run(b, stream); };
+    }
+
+    std::function<void (IBlock *)> getInnerSweep(gpuStream_t stream = nullptr)
+    {
+        return [this, stream]
+               (IBlock * b)
+               { this->inner(b, stream); };
+    }
+
+    std::function<void (IBlock *)> getOuterSweep(gpuStream_t stream = nullptr)
+    {
+        return [this, stream]
+               (IBlock * b)
+               { this->outer(b, stream); };
+    }
+
+    template<typename FlagField_T>
+    void fillFromFlagField( const shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID)
+    {
+        for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+            fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID );
+    }
+
+
+    template<typename FlagField_T>
+    void fillFromFlagField(IBlock * block, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID )
+    {
+        auto * indexVectors = block->getData< IndexVectors > ( indexVectorID );
+        auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+        auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+        auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+        auto * flagField = block->getData< FlagField_T > ( flagFieldID );
+        auto pdfs = block->getData< field::GhostLayerField<real_t, 19> >(pdfsCPUID); 
+
+        if( !(flagField->flagExists(boundaryFlagUID) && flagField->flagExists(domainFlagUID) ))
+            return;
+
+        auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+        auto domainFlag = flagField->getFlag(domainFlagUID);
+
+        auto inner = flagField->xyzSize();
+        inner.expand( cell_idx_t(-1) );
+
+        indexVectorAll.clear();
+        indexVectorInner.clear();
+        indexVectorOuter.clear();
+
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  4 );
+              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3);
+                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3);
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  8 );
+              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9);
+                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9);
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  10 );
+              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7);
+                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7);
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  14 );
+              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17);
+                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17);
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  18 );
+              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13);
+                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13);
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        
+        
+
+        indexVectors->syncGPU();
+    }
+
+private:
+    void run_impl(IBlock * block, IndexVectors::Type type, gpuStream_t stream = nullptr);
+
+    BlockDataID indexVectorID;
+    BlockDataID pdfsCPUID;
+public:
+    BlockDataID pdfsID;
+};
+
+
+
+} // namespace lbm
+} // namespace walberla
\ No newline at end of file
diff --git a/apps/benchmarks/GridRefSphere/README b/apps/benchmarks/GridRefSphere/README
new file mode 100644
index 000000000..6fd7561ed
--- /dev/null
+++ b/apps/benchmarks/GridRefSphere/README
@@ -0,0 +1,27 @@
+#Steps followed
+1) I have generated the files
+   [-] StorageSpecification.cu   StorageSpecification.h
+   [-] SweepCollection.cu        SweepCollection.h
+   [-] NoSlip.cu                 NoSlip.h 		//Generated NoSlip BC
+   [-] Outflow.cu                Outflow.h		//Generated Outflow BC, using M. Geier approach, to be applied on the East wall of the domain.
+   [-] FreeSlip.cu               FreeSlip.h		//Generated Free-Slip BC 
+   [-] UBB.cu                    UBB.h			//Velocity-Bounce-Back BC for the inlet on the West wall of the domain.
+   [-] BoundaryCollection.h
+   using codegen. In pratice, this code-generation is performed via the python script "gridRef_SPHERE.py", which follows the benchmark code walberla/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.py
+
+2) It seems that the constructor of the 'Outflow' boundary condition appearing in the class "BoundaryCollection" has a wrong number of inputs. Infact, the generated OutFlow object requires the ID of the pdfs on both
+   CPU and GPU, while the Outflow-constructor appearing in the BoundaryCollection class has only the ID of the GPU field.
+   Therefore, I modified the BoundaryCollection constructor in the BoundaryCollection.h file (see comments in the code).
+
+3) The application used to simulate the flow past a sphere with grid refinement is collected in file refGrid_SPHERE.cpp. The compilation is performed using the CMakeLists.txt file in this folder.
+   Furthermore, in the compilation, all the files at point 1) are collected in the same folder where refGrid_SPHERE.cpp is found.
+
+4) The code is launched with 
+   	mpirun -np N GRID_REF_SPHERE SPHERE.prm
+   being N the number of tasks and SPHERE.prm the parameter file.
+   The mesh representing the sphere is found in the *.obj file sphere.obj
+
+Problems found:
+
+[+] Using this procedure, I can only run the code with a number of GPUs equal to the number of coarse blocks specified in the parameter file as "numblocks".
+[+] Once using 4 GPUs (1 node) with 4 coarse root blocks, the code runs without problems. Unfortunately, the results do not seems to be correct, at least in my opinion.
diff --git a/apps/benchmarks/GridRefSphere/SPHERE.prm b/apps/benchmarks/GridRefSphere/SPHERE.prm
new file mode 100644
index 000000000..dd163c02e
--- /dev/null
+++ b/apps/benchmarks/GridRefSphere/SPHERE.prm
@@ -0,0 +1,31 @@
+Parameters
+{
+    Re              100;
+    maxPhysT        1.0;
+    uLB             0.050;
+    remainingTimeLoggerFrequency 3;
+}
+
+DomainSetup
+{
+   dx0               0.06264772;
+   dy0               0.03132386;
+   dz0               0.06264772;
+   dt0               0.00028220;
+   cellsPerBlock    < 128, 128, 128 >;
+   numblocks        < 2, 2, 1 >;
+   domainScaling    < 8.0, 4.0, 4.0 >;
+   numLevels         2;
+   meshFile         sphere.obj;
+   diameter         2.00472700;
+}
+
+Boundaries
+{
+    Border { direction W;    walldistance -1; flag UBB; }
+    Border { direction E;    walldistance -1; flag Outflow;  }
+    Border { direction N;    walldistance -1; flag FreeSlip; }
+    Border { direction S;    walldistance -1; flag FreeSlip; }
+    Border { direction T;    walldistance -1; flag FreeSlip; }
+    Border { direction B;    walldistance -1; flag FreeSlip; }
+}
diff --git a/apps/benchmarks/GridRefSphere/StorageSpecification.cu b/apps/benchmarks/GridRefSphere/StorageSpecification.cu
new file mode 100644
index 000000000..7c319e26d
--- /dev/null
+++ b/apps/benchmarks/GridRefSphere/StorageSpecification.cu
@@ -0,0 +1,5330 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file StorageSpecification.cpp
+//! \\author lbmpy
+//======================================================================================================================
+
+#include "StorageSpecification.h"
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Wfloat-equal"
+#   pragma GCC diagnostic ignored "-Wshadow"
+#   pragma GCC diagnostic ignored "-Wconversion"
+#   pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+/*************************************************************************************
+ *                                Kernel Definitions
+*************************************************************************************/
+namespace internal_storagespecification_pack_ALL {
+static FUNC_PREFIX void storagespecification_pack_ALL(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_src_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_src_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_src_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT _data_pdfs_src_10_20_30 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2;
+      _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0] = _data_pdfs_src_10_20_30[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_31 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3;
+      _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 1] = _data_pdfs_src_10_20_31[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_32 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3;
+      _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 2] = _data_pdfs_src_10_20_32[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_33 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3;
+      _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 3] = _data_pdfs_src_10_20_33[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_34 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3;
+      _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 4] = _data_pdfs_src_10_20_34[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_35 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3;
+      _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 5] = _data_pdfs_src_10_20_35[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_36 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3;
+      _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 6] = _data_pdfs_src_10_20_36[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_37 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3;
+      _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 7] = _data_pdfs_src_10_20_37[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_38 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3;
+      _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 8] = _data_pdfs_src_10_20_38[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_39 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3;
+      _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 9] = _data_pdfs_src_10_20_39[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_310 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3;
+      _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 10] = _data_pdfs_src_10_20_310[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_311 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3;
+      _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 11] = _data_pdfs_src_10_20_311[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_312 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3;
+      _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 12] = _data_pdfs_src_10_20_312[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_313 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3;
+      _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 13] = _data_pdfs_src_10_20_313[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_314 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3;
+      _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 14] = _data_pdfs_src_10_20_314[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_315 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3;
+      _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 15] = _data_pdfs_src_10_20_315[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_316 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3;
+      _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 16] = _data_pdfs_src_10_20_316[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_317 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3;
+      _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 17] = _data_pdfs_src_10_20_317[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_318 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3;
+      _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 18] = _data_pdfs_src_10_20_318[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpack_ALL {
+static FUNC_PREFIX void storagespecification_unpack_ALL(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2;
+      _data_pdfs_dst_10_20_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 1];
+      double * RESTRICT  _data_pdfs_dst_10_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 2];
+      double * RESTRICT  _data_pdfs_dst_10_20_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 3];
+      double * RESTRICT  _data_pdfs_dst_10_20_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 4];
+      double * RESTRICT  _data_pdfs_dst_10_20_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 5];
+      double * RESTRICT  _data_pdfs_dst_10_20_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 6];
+      double * RESTRICT  _data_pdfs_dst_10_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 7];
+      double * RESTRICT  _data_pdfs_dst_10_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 8];
+      double * RESTRICT  _data_pdfs_dst_10_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 9];
+      double * RESTRICT  _data_pdfs_dst_10_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 10];
+      double * RESTRICT  _data_pdfs_dst_10_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 11];
+      double * RESTRICT  _data_pdfs_dst_10_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 12];
+      double * RESTRICT  _data_pdfs_dst_10_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 13];
+      double * RESTRICT  _data_pdfs_dst_10_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 14];
+      double * RESTRICT  _data_pdfs_dst_10_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 15];
+      double * RESTRICT  _data_pdfs_dst_10_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 16];
+      double * RESTRICT  _data_pdfs_dst_10_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 17];
+      double * RESTRICT  _data_pdfs_dst_10_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 18];
+   } 
+}
+}
+
+namespace internal_storagespecification_localCopy_ALL {
+static FUNC_PREFIX void storagespecification_localCopy_ALL(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_10_20_30 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2;
+      _data_pdfs_dst_10_20_30[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_30[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_31 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_31[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_31[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_32 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_32[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_32[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_33 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_33[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_33[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_34 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_34[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_34[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_35 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_35[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_35[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_36 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_36[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_36[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_37 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_37[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_38 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_38[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_39 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_39[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_39[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_310 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_310[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_310[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_311 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_311[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_312 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_312[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_312[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_313 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_313[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_314 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_314[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_315 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_315[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_315[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_316 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_316[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_316[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_317 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_317[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_317[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_318 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_318[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_318[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+
+namespace internal_storagespecification_pack_BN {
+static FUNC_PREFIX void storagespecification_pack_BN(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_src_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_src_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_src_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT _data_pdfs_src_10_20_315 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3;
+      _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src_10_20_315[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_pack_NE {
+static FUNC_PREFIX void storagespecification_pack_NE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_src_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_src_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_src_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT _data_pdfs_src_10_20_38 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3;
+      _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src_10_20_38[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_pack_TN {
+static FUNC_PREFIX void storagespecification_pack_TN(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_src_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_src_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_src_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT _data_pdfs_src_10_20_311 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3;
+      _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src_10_20_311[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_pack_SW {
+static FUNC_PREFIX void storagespecification_pack_SW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_src_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_src_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_src_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT _data_pdfs_src_10_20_39 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3;
+      _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src_10_20_39[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_pack_NW {
+static FUNC_PREFIX void storagespecification_pack_NW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_src_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_src_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_src_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT _data_pdfs_src_10_20_37 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3;
+      _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src_10_20_37[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_pack_BS {
+static FUNC_PREFIX void storagespecification_pack_BS(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_src_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_src_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_src_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT _data_pdfs_src_10_20_316 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3;
+      _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src_10_20_316[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_pack_BW {
+static FUNC_PREFIX void storagespecification_pack_BW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_src_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_src_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_src_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT _data_pdfs_src_10_20_317 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3;
+      _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src_10_20_317[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_pack_N {
+static FUNC_PREFIX void storagespecification_pack_N(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_src_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_src_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_src_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT _data_pdfs_src_10_20_31 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0] = _data_pdfs_src_10_20_31[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_37 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 1] = _data_pdfs_src_10_20_37[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_38 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 2] = _data_pdfs_src_10_20_38[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_311 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 3] = _data_pdfs_src_10_20_311[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_315 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 4] = _data_pdfs_src_10_20_315[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_pack_TE {
+static FUNC_PREFIX void storagespecification_pack_TE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_src_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_src_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_src_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT _data_pdfs_src_10_20_314 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3;
+      _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src_10_20_314[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_pack_W {
+static FUNC_PREFIX void storagespecification_pack_W(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_src_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_src_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_src_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT _data_pdfs_src_10_20_33 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0] = _data_pdfs_src_10_20_33[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_37 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 1] = _data_pdfs_src_10_20_37[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_39 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 2] = _data_pdfs_src_10_20_39[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_313 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 3] = _data_pdfs_src_10_20_313[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_317 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 4] = _data_pdfs_src_10_20_317[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_pack_E {
+static FUNC_PREFIX void storagespecification_pack_E(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_src_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_src_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_src_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT _data_pdfs_src_10_20_34 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0] = _data_pdfs_src_10_20_34[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_38 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 1] = _data_pdfs_src_10_20_38[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_310 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 2] = _data_pdfs_src_10_20_310[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_314 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 3] = _data_pdfs_src_10_20_314[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_318 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 4] = _data_pdfs_src_10_20_318[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_pack_BE {
+static FUNC_PREFIX void storagespecification_pack_BE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_src_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_src_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_src_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT _data_pdfs_src_10_20_318 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3;
+      _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src_10_20_318[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_pack_SE {
+static FUNC_PREFIX void storagespecification_pack_SE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_src_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_src_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_src_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT _data_pdfs_src_10_20_310 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3;
+      _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src_10_20_310[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_pack_TW {
+static FUNC_PREFIX void storagespecification_pack_TW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_src_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_src_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_src_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT _data_pdfs_src_10_20_313 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3;
+      _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src_10_20_313[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_pack_S {
+static FUNC_PREFIX void storagespecification_pack_S(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_src_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_src_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_src_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT _data_pdfs_src_10_20_32 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0] = _data_pdfs_src_10_20_32[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_39 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 1] = _data_pdfs_src_10_20_39[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_310 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 2] = _data_pdfs_src_10_20_310[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_312 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 3] = _data_pdfs_src_10_20_312[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_316 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 4] = _data_pdfs_src_10_20_316[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_pack_TS {
+static FUNC_PREFIX void storagespecification_pack_TS(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_src_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_src_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_src_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT _data_pdfs_src_10_20_312 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3;
+      _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src_10_20_312[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_pack_B {
+static FUNC_PREFIX void storagespecification_pack_B(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_src_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_src_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_src_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT _data_pdfs_src_10_20_36 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0] = _data_pdfs_src_10_20_36[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_315 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 1] = _data_pdfs_src_10_20_315[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_316 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 2] = _data_pdfs_src_10_20_316[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_317 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 3] = _data_pdfs_src_10_20_317[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_318 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 4] = _data_pdfs_src_10_20_318[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_pack_T {
+static FUNC_PREFIX void storagespecification_pack_T(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_src_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_src_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_src_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT _data_pdfs_src_10_20_35 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0] = _data_pdfs_src_10_20_35[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_311 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 1] = _data_pdfs_src_10_20_311[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_312 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 2] = _data_pdfs_src_10_20_312[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_313 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 3] = _data_pdfs_src_10_20_313[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT _data_pdfs_src_10_20_314 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3;
+      _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 4] = _data_pdfs_src_10_20_314[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpack_NE {
+static FUNC_PREFIX void storagespecification_unpack_NE(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpack_TE {
+static FUNC_PREFIX void storagespecification_unpack_TE(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpack_BS {
+static FUNC_PREFIX void storagespecification_unpack_BS(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpack_B {
+static FUNC_PREFIX void storagespecification_unpack_B(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1];
+      double * RESTRICT  _data_pdfs_dst_10_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2];
+      double * RESTRICT  _data_pdfs_dst_10_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3];
+      double * RESTRICT  _data_pdfs_dst_10_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpack_SE {
+static FUNC_PREFIX void storagespecification_unpack_SE(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpack_T {
+static FUNC_PREFIX void storagespecification_unpack_T(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1];
+      double * RESTRICT  _data_pdfs_dst_10_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2];
+      double * RESTRICT  _data_pdfs_dst_10_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3];
+      double * RESTRICT  _data_pdfs_dst_10_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpack_TN {
+static FUNC_PREFIX void storagespecification_unpack_TN(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpack_BN {
+static FUNC_PREFIX void storagespecification_unpack_BN(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpack_TS {
+static FUNC_PREFIX void storagespecification_unpack_TS(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpack_SW {
+static FUNC_PREFIX void storagespecification_unpack_SW(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpack_BW {
+static FUNC_PREFIX void storagespecification_unpack_BW(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpack_NW {
+static FUNC_PREFIX void storagespecification_unpack_NW(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpack_E {
+static FUNC_PREFIX void storagespecification_unpack_E(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1];
+      double * RESTRICT  _data_pdfs_dst_10_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2];
+      double * RESTRICT  _data_pdfs_dst_10_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3];
+      double * RESTRICT  _data_pdfs_dst_10_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpack_TW {
+static FUNC_PREFIX void storagespecification_unpack_TW(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpack_S {
+static FUNC_PREFIX void storagespecification_unpack_S(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1];
+      double * RESTRICT  _data_pdfs_dst_10_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2];
+      double * RESTRICT  _data_pdfs_dst_10_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3];
+      double * RESTRICT  _data_pdfs_dst_10_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpack_W {
+static FUNC_PREFIX void storagespecification_unpack_W(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1];
+      double * RESTRICT  _data_pdfs_dst_10_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2];
+      double * RESTRICT  _data_pdfs_dst_10_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3];
+      double * RESTRICT  _data_pdfs_dst_10_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpack_BE {
+static FUNC_PREFIX void storagespecification_unpack_BE(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpack_N {
+static FUNC_PREFIX void storagespecification_unpack_N(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1];
+      double * RESTRICT  _data_pdfs_dst_10_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2];
+      double * RESTRICT  _data_pdfs_dst_10_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3];
+      double * RESTRICT  _data_pdfs_dst_10_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4];
+   } 
+}
+}
+
+namespace internal_storagespecification_localCopy_B {
+static FUNC_PREFIX void storagespecification_localCopy_B(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_36 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_36[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_36[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_315 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_315[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_315[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_316 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_316[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_316[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_317 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_317[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_317[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_318 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_318[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_318[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_localCopy_W {
+static FUNC_PREFIX void storagespecification_localCopy_W(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_33 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_33[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_33[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_37 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_37[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_39 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_39[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_39[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_313 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_313[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_317 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_317[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_317[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_localCopy_S {
+static FUNC_PREFIX void storagespecification_localCopy_S(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_32 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_32[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_32[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_39 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_39[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_39[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_310 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_310[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_310[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_312 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_312[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_312[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_316 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_316[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_316[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_localCopy_NE {
+static FUNC_PREFIX void storagespecification_localCopy_NE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_38 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_38[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_localCopy_NW {
+static FUNC_PREFIX void storagespecification_localCopy_NW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_37 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_37[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_localCopy_E {
+static FUNC_PREFIX void storagespecification_localCopy_E(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_34 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_34[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_34[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_38 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_38[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_310 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_310[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_310[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_314 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_314[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_318 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_318[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_318[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_localCopy_T {
+static FUNC_PREFIX void storagespecification_localCopy_T(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_35 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_35[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_35[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_311 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_311[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_312 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_312[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_312[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_313 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_313[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_314 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_314[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_localCopy_SW {
+static FUNC_PREFIX void storagespecification_localCopy_SW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_39 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_39[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_39[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_localCopy_BN {
+static FUNC_PREFIX void storagespecification_localCopy_BN(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_315 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_315[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_315[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_localCopy_SE {
+static FUNC_PREFIX void storagespecification_localCopy_SE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_310 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_310[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_310[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_localCopy_BE {
+static FUNC_PREFIX void storagespecification_localCopy_BE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_318 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_318[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_318[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_localCopy_BS {
+static FUNC_PREFIX void storagespecification_localCopy_BS(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_316 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_316[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_316[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_localCopy_BW {
+static FUNC_PREFIX void storagespecification_localCopy_BW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_317 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_317[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_317[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_localCopy_TN {
+static FUNC_PREFIX void storagespecification_localCopy_TN(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_311 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_311[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_localCopy_TE {
+static FUNC_PREFIX void storagespecification_localCopy_TE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_314 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_314[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_localCopy_TS {
+static FUNC_PREFIX void storagespecification_localCopy_TS(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_312 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_312[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_312[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_localCopy_N {
+static FUNC_PREFIX void storagespecification_localCopy_N(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_31 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_31[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_31[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_37 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_37[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_38 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_38[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_311 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_311[_stride_pdfs_src_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_315 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_315[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_315[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_localCopy_TW {
+static FUNC_PREFIX void storagespecification_localCopy_TW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_10_20_313 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3;
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0] = _data_pdfs_src_10_20_313[_stride_pdfs_src_0*ctr_0];
+   } 
+}
+}
+
+
+namespace internal_storagespecification_unpackRedistribute_TN {
+static FUNC_PREFIX void storagespecification_unpackRedistribute_TN(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2;
+      _data_pdfs_dst_10_20_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_10_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_10_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_10_20_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_10_20_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_10_20_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_10_20_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_10_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_10_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_10_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_10_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_10_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_10_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_10_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_10_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_10_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_10_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_10_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_10_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      _data_pdfs_dst_10_20_30[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      _data_pdfs_dst_10_20_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      _data_pdfs_dst_10_20_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      _data_pdfs_dst_10_20_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      _data_pdfs_dst_10_20_34[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      _data_pdfs_dst_10_20_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      _data_pdfs_dst_10_20_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      _data_pdfs_dst_10_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      _data_pdfs_dst_10_20_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      _data_pdfs_dst_10_20_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      _data_pdfs_dst_10_20_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      _data_pdfs_dst_10_20_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      _data_pdfs_dst_10_20_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      _data_pdfs_dst_10_20_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_10_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_11_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_11_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      _data_pdfs_dst_10_21_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      _data_pdfs_dst_11_20_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackRedistribute_BW {
+static FUNC_PREFIX void storagespecification_unpackRedistribute_BW(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_21_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2;
+      _data_pdfs_dst_10_21_30[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_10_21_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_10_21_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_10_21_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_10_21_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_34[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_10_21_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_10_21_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_10_21_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_10_21_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_10_21_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_10_21_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_10_21_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_10_21_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_10_21_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_10_21_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_10_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_10_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_10_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_10_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_11_21_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2;
+      _data_pdfs_dst_11_21_30[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_11_21_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_11_21_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_11_21_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_11_21_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_34[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_11_21_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_11_21_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_11_21_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_11_21_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_11_21_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_11_21_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_11_21_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_11_21_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_11_21_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_11_21_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_11_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_11_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_11_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_11_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_10_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      _data_pdfs_dst_10_21_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_11_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      _data_pdfs_dst_11_21_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      _data_pdfs_dst_11_20_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackRedistribute_S {
+static FUNC_PREFIX void storagespecification_unpackRedistribute_S(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_11_20_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2;
+      _data_pdfs_dst_11_20_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_11_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_11_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_11_20_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_11_20_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_11_20_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_11_20_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_11_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_11_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_11_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_11_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_11_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_11_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_11_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_11_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_11_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_11_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_11_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_11_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_11_21_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2;
+      _data_pdfs_dst_11_21_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_11_21_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_11_21_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_11_21_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_11_21_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_11_21_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_11_21_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_11_21_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_11_21_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_11_21_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_11_21_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_11_21_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_11_21_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_11_21_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_11_21_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_11_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_11_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_11_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_11_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      _data_pdfs_dst_11_20_30[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      _data_pdfs_dst_11_20_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      _data_pdfs_dst_11_20_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      _data_pdfs_dst_11_20_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      _data_pdfs_dst_11_20_34[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      _data_pdfs_dst_11_20_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      _data_pdfs_dst_11_20_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      _data_pdfs_dst_11_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      _data_pdfs_dst_11_20_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      _data_pdfs_dst_11_20_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      _data_pdfs_dst_11_20_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      _data_pdfs_dst_11_20_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      _data_pdfs_dst_11_20_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      _data_pdfs_dst_11_20_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      _data_pdfs_dst_11_20_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      _data_pdfs_dst_11_20_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      _data_pdfs_dst_11_20_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      _data_pdfs_dst_11_21_30[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      _data_pdfs_dst_11_21_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      _data_pdfs_dst_11_21_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      _data_pdfs_dst_11_21_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      _data_pdfs_dst_11_21_34[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      _data_pdfs_dst_11_21_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      _data_pdfs_dst_11_21_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      _data_pdfs_dst_11_21_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      _data_pdfs_dst_11_21_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      _data_pdfs_dst_11_21_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      _data_pdfs_dst_11_21_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      _data_pdfs_dst_11_21_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      _data_pdfs_dst_11_21_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      _data_pdfs_dst_11_21_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      _data_pdfs_dst_11_21_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      _data_pdfs_dst_11_21_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      _data_pdfs_dst_11_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      _data_pdfs_dst_11_21_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_10_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_10_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_10_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_10_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_10_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_10_21_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_10_21_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_10_21_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_10_21_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_10_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      _data_pdfs_dst_10_20_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      _data_pdfs_dst_10_20_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      _data_pdfs_dst_10_21_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      _data_pdfs_dst_10_21_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      _data_pdfs_dst_10_21_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      _data_pdfs_dst_10_21_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      _data_pdfs_dst_10_21_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackRedistribute_SW {
+static FUNC_PREFIX void storagespecification_unpackRedistribute_SW(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_11_20_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2;
+      _data_pdfs_dst_11_20_30[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_11_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_11_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_11_20_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_11_20_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_34[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_11_20_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_11_20_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_11_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_11_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_11_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_11_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_11_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_11_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_11_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_11_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_11_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_11_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_11_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_11_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_11_21_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2;
+      _data_pdfs_dst_11_21_30[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_11_21_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_11_21_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_11_21_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_11_21_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_34[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_11_21_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_11_21_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_11_21_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_11_21_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_11_21_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_11_21_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_11_21_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_11_21_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_11_21_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_11_21_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_11_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_11_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_11_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_11_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_10_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      _data_pdfs_dst_11_20_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_10_21_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      _data_pdfs_dst_11_21_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      _data_pdfs_dst_10_21_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackRedistribute_B {
+static FUNC_PREFIX void storagespecification_unpackRedistribute_B(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_21_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2;
+      _data_pdfs_dst_10_21_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_10_21_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_10_21_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_10_21_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_10_21_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_10_21_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_10_21_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_10_21_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_10_21_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_10_21_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_10_21_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_10_21_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_10_21_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_10_21_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_10_21_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_10_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_10_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_10_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_10_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_11_21_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2;
+      _data_pdfs_dst_11_21_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_11_21_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_11_21_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_11_21_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_11_21_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_11_21_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_11_21_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_11_21_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_11_21_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_11_21_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_11_21_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_11_21_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_11_21_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_11_21_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_11_21_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_11_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_11_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_11_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_11_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      _data_pdfs_dst_10_21_30[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      _data_pdfs_dst_10_21_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      _data_pdfs_dst_10_21_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      _data_pdfs_dst_10_21_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      _data_pdfs_dst_10_21_34[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      _data_pdfs_dst_10_21_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      _data_pdfs_dst_10_21_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      _data_pdfs_dst_10_21_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      _data_pdfs_dst_10_21_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      _data_pdfs_dst_10_21_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      _data_pdfs_dst_10_21_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      _data_pdfs_dst_10_21_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      _data_pdfs_dst_10_21_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      _data_pdfs_dst_10_21_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      _data_pdfs_dst_10_21_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      _data_pdfs_dst_10_21_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      _data_pdfs_dst_10_21_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      _data_pdfs_dst_11_21_30[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      _data_pdfs_dst_11_21_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      _data_pdfs_dst_11_21_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      _data_pdfs_dst_11_21_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      _data_pdfs_dst_11_21_34[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      _data_pdfs_dst_11_21_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      _data_pdfs_dst_11_21_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      _data_pdfs_dst_11_21_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      _data_pdfs_dst_11_21_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      _data_pdfs_dst_11_21_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      _data_pdfs_dst_11_21_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      _data_pdfs_dst_11_21_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      _data_pdfs_dst_11_21_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      _data_pdfs_dst_11_21_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      _data_pdfs_dst_11_21_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      _data_pdfs_dst_11_21_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      _data_pdfs_dst_11_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      _data_pdfs_dst_11_21_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_10_20_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_10_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_10_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_10_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_10_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_11_20_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_11_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_11_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_11_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_11_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      _data_pdfs_dst_10_20_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      _data_pdfs_dst_10_20_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      _data_pdfs_dst_11_20_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      _data_pdfs_dst_11_20_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      _data_pdfs_dst_11_20_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      _data_pdfs_dst_11_20_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      _data_pdfs_dst_11_20_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackRedistribute_E {
+static FUNC_PREFIX void storagespecification_unpackRedistribute_E(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2;
+      _data_pdfs_dst_10_20_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_10_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_10_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_10_20_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_10_20_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_10_20_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_10_20_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_10_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_10_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_10_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_10_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_10_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_10_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_10_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_10_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_10_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_10_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_10_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_10_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_10_21_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2;
+      _data_pdfs_dst_10_21_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_10_21_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_10_21_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_10_21_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_10_21_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_10_21_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_10_21_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_10_21_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_10_21_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_10_21_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_10_21_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_10_21_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_10_21_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_10_21_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_10_21_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_10_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_10_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_10_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_10_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_11_20_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2;
+      _data_pdfs_dst_11_20_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_11_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_11_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_11_20_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_11_20_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_11_20_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_11_20_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_11_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_11_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_11_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_11_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_11_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_11_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_11_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_11_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_11_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_11_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_11_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_11_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_11_21_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2;
+      _data_pdfs_dst_11_21_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_11_21_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_11_21_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_11_21_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_11_21_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_11_21_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_11_21_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_11_21_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_11_21_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_11_21_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_11_21_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_11_21_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_11_21_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_11_21_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_11_21_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_11_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_11_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_11_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_11_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      _data_pdfs_dst_10_20_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      _data_pdfs_dst_10_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      _data_pdfs_dst_10_20_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      _data_pdfs_dst_10_21_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      _data_pdfs_dst_10_21_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      _data_pdfs_dst_10_21_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      _data_pdfs_dst_10_21_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      _data_pdfs_dst_11_20_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      _data_pdfs_dst_11_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      _data_pdfs_dst_11_20_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      _data_pdfs_dst_11_20_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      _data_pdfs_dst_11_21_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      _data_pdfs_dst_11_21_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      _data_pdfs_dst_11_21_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      _data_pdfs_dst_11_21_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      _data_pdfs_dst_11_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackRedistribute_TE {
+static FUNC_PREFIX void storagespecification_unpackRedistribute_TE(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2;
+      _data_pdfs_dst_10_20_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_10_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_10_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_10_20_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_10_20_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_10_20_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_10_20_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_10_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_10_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_10_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_10_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_10_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_10_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_10_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_10_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_10_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_10_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_10_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_10_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_11_20_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2;
+      _data_pdfs_dst_11_20_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_11_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_11_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_11_20_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_11_20_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_11_20_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_11_20_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_11_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_11_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_11_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_11_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_11_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_11_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_11_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_11_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_11_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_11_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_11_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_11_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_10_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      _data_pdfs_dst_10_20_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_11_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      _data_pdfs_dst_11_20_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      _data_pdfs_dst_11_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackRedistribute_NW {
+static FUNC_PREFIX void storagespecification_unpackRedistribute_NW(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2;
+      _data_pdfs_dst_10_20_30[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_10_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_10_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_10_20_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_10_20_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_34[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_10_20_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_10_20_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_10_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_10_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_10_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_10_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_10_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_10_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_10_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_10_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_10_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_10_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_10_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_10_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_10_21_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2;
+      _data_pdfs_dst_10_21_30[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_10_21_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_10_21_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_10_21_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_10_21_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_34[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_10_21_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_10_21_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_10_21_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_10_21_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_10_21_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_10_21_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_10_21_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_10_21_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_10_21_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_10_21_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_10_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_10_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_10_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_10_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_11_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      _data_pdfs_dst_10_20_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_11_21_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      _data_pdfs_dst_10_21_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      _data_pdfs_dst_11_21_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackRedistribute_T {
+static FUNC_PREFIX void storagespecification_unpackRedistribute_T(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2;
+      _data_pdfs_dst_10_20_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_10_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_10_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_10_20_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_10_20_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_10_20_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_10_20_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_10_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_10_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_10_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_10_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_10_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_10_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_10_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_10_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_10_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_10_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_10_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_10_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_11_20_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2;
+      _data_pdfs_dst_11_20_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_11_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_11_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_11_20_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_11_20_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_11_20_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_11_20_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_11_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_11_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_11_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_11_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_11_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_11_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_11_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_11_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_11_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_11_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_11_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_11_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      _data_pdfs_dst_10_20_30[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      _data_pdfs_dst_10_20_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      _data_pdfs_dst_10_20_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      _data_pdfs_dst_10_20_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      _data_pdfs_dst_10_20_34[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      _data_pdfs_dst_10_20_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      _data_pdfs_dst_10_20_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      _data_pdfs_dst_10_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      _data_pdfs_dst_10_20_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      _data_pdfs_dst_10_20_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      _data_pdfs_dst_10_20_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      _data_pdfs_dst_10_20_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      _data_pdfs_dst_10_20_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      _data_pdfs_dst_10_20_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      _data_pdfs_dst_11_20_30[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      _data_pdfs_dst_11_20_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      _data_pdfs_dst_11_20_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      _data_pdfs_dst_11_20_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      _data_pdfs_dst_11_20_34[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      _data_pdfs_dst_11_20_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      _data_pdfs_dst_11_20_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      _data_pdfs_dst_11_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      _data_pdfs_dst_11_20_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      _data_pdfs_dst_11_20_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      _data_pdfs_dst_11_20_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      _data_pdfs_dst_11_20_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      _data_pdfs_dst_11_20_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      _data_pdfs_dst_11_20_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      _data_pdfs_dst_11_20_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      _data_pdfs_dst_11_20_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      _data_pdfs_dst_11_20_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_10_21_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_10_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_10_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_10_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_10_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_11_21_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_11_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_11_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_11_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_11_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      _data_pdfs_dst_10_21_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      _data_pdfs_dst_10_21_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      _data_pdfs_dst_10_21_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      _data_pdfs_dst_11_21_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      _data_pdfs_dst_11_21_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      _data_pdfs_dst_11_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      _data_pdfs_dst_11_21_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackRedistribute_TW {
+static FUNC_PREFIX void storagespecification_unpackRedistribute_TW(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2;
+      _data_pdfs_dst_10_20_30[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_10_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_10_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_10_20_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_10_20_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_34[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_10_20_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_10_20_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_10_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_10_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_10_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_10_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_10_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_10_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_10_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_10_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_10_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_10_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_10_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_10_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_11_20_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2;
+      _data_pdfs_dst_11_20_30[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_11_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_11_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_11_20_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_11_20_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_34[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_11_20_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_11_20_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_11_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_11_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_11_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_11_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_11_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_11_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_11_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_11_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_11_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_11_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_11_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_11_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_10_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      _data_pdfs_dst_10_20_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_11_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      _data_pdfs_dst_11_20_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      _data_pdfs_dst_11_21_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackRedistribute_W {
+static FUNC_PREFIX void storagespecification_unpackRedistribute_W(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2;
+      _data_pdfs_dst_10_20_30[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_10_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_10_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_10_20_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_10_20_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_34[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_10_20_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_10_20_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_10_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_10_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_10_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_10_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_10_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_10_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_10_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_10_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_10_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_10_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_10_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_10_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_10_21_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2;
+      _data_pdfs_dst_10_21_30[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_10_21_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_10_21_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_10_21_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_10_21_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_34[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_10_21_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_10_21_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_10_21_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_10_21_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_10_21_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_10_21_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_10_21_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_10_21_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_10_21_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_10_21_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_10_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_10_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_10_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_10_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_11_20_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2;
+      _data_pdfs_dst_11_20_30[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_11_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_11_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_11_20_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_11_20_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_34[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_11_20_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_11_20_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_11_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_11_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_11_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_11_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_11_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_11_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_11_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_11_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_11_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_11_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_11_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_11_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_11_21_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2;
+      _data_pdfs_dst_11_21_30[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_11_21_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_11_21_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_11_21_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_11_21_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_34[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_11_21_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_11_21_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_11_21_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_11_21_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_11_21_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_11_21_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_11_21_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_11_21_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_11_21_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_11_21_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_11_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_11_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_11_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_11_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      _data_pdfs_dst_10_20_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      _data_pdfs_dst_10_20_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      _data_pdfs_dst_10_20_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      _data_pdfs_dst_10_21_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      _data_pdfs_dst_10_21_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      _data_pdfs_dst_10_21_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      _data_pdfs_dst_10_21_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      _data_pdfs_dst_11_20_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      _data_pdfs_dst_11_20_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      _data_pdfs_dst_11_20_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      _data_pdfs_dst_11_20_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      _data_pdfs_dst_11_21_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      _data_pdfs_dst_11_21_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      _data_pdfs_dst_11_21_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      _data_pdfs_dst_11_21_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      _data_pdfs_dst_11_21_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackRedistribute_TS {
+static FUNC_PREFIX void storagespecification_unpackRedistribute_TS(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_11_20_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2;
+      _data_pdfs_dst_11_20_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_11_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_11_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_11_20_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_11_20_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_11_20_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_11_20_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_11_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_11_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_11_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_11_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_11_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_11_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_11_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_11_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_11_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_11_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_11_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_11_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      _data_pdfs_dst_11_20_30[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      _data_pdfs_dst_11_20_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      _data_pdfs_dst_11_20_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      _data_pdfs_dst_11_20_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      _data_pdfs_dst_11_20_34[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      _data_pdfs_dst_11_20_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      _data_pdfs_dst_11_20_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      _data_pdfs_dst_11_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      _data_pdfs_dst_11_20_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      _data_pdfs_dst_11_20_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      _data_pdfs_dst_11_20_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      _data_pdfs_dst_11_20_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      _data_pdfs_dst_11_20_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      _data_pdfs_dst_11_20_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      _data_pdfs_dst_11_20_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      _data_pdfs_dst_11_20_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      _data_pdfs_dst_11_20_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_11_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_10_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_10_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      _data_pdfs_dst_11_21_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      _data_pdfs_dst_10_20_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      _data_pdfs_dst_10_21_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackRedistribute_NE {
+static FUNC_PREFIX void storagespecification_unpackRedistribute_NE(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2;
+      _data_pdfs_dst_10_20_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_10_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_10_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_10_20_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_10_20_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_10_20_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_10_20_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_10_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_10_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_10_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_10_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_10_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_10_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_10_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_10_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_10_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_10_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_10_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_10_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_10_21_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2;
+      _data_pdfs_dst_10_21_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_10_21_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_10_21_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_10_21_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_10_21_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_10_21_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_10_21_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_10_21_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_10_21_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_10_21_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_10_21_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_10_21_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_10_21_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_10_21_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_10_21_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_10_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_10_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_10_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_10_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_11_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      _data_pdfs_dst_10_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_11_21_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      _data_pdfs_dst_10_21_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      _data_pdfs_dst_11_21_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackRedistribute_BN {
+static FUNC_PREFIX void storagespecification_unpackRedistribute_BN(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_21_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2;
+      _data_pdfs_dst_10_21_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_10_21_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_10_21_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_10_21_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_10_21_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_10_21_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_10_21_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_10_21_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_10_21_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_10_21_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_10_21_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_10_21_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_10_21_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_10_21_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_10_21_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_10_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_10_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_10_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_10_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      _data_pdfs_dst_10_21_30[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      _data_pdfs_dst_10_21_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      _data_pdfs_dst_10_21_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      _data_pdfs_dst_10_21_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      _data_pdfs_dst_10_21_34[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      _data_pdfs_dst_10_21_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      _data_pdfs_dst_10_21_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      _data_pdfs_dst_10_21_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      _data_pdfs_dst_10_21_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      _data_pdfs_dst_10_21_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      _data_pdfs_dst_10_21_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      _data_pdfs_dst_10_21_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      _data_pdfs_dst_10_21_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      _data_pdfs_dst_10_21_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      _data_pdfs_dst_10_21_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      _data_pdfs_dst_10_21_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      _data_pdfs_dst_10_21_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_10_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_11_21_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_11_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      _data_pdfs_dst_10_20_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      _data_pdfs_dst_11_21_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      _data_pdfs_dst_11_20_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackRedistribute_N {
+static FUNC_PREFIX void storagespecification_unpackRedistribute_N(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2;
+      _data_pdfs_dst_10_20_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_10_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_10_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_10_20_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_10_20_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_10_20_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_10_20_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_10_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_10_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_10_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_10_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_10_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_10_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_10_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_10_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_10_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_10_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_10_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_10_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_10_21_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2;
+      _data_pdfs_dst_10_21_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_10_21_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_10_21_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_10_21_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_10_21_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_10_21_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_10_21_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_10_21_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_10_21_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_10_21_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_10_21_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_10_21_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_10_21_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_10_21_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_10_21_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_10_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_10_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_10_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_10_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      _data_pdfs_dst_10_20_30[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      _data_pdfs_dst_10_20_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      _data_pdfs_dst_10_20_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      _data_pdfs_dst_10_20_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      _data_pdfs_dst_10_20_34[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      _data_pdfs_dst_10_20_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      _data_pdfs_dst_10_20_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      _data_pdfs_dst_10_20_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      _data_pdfs_dst_10_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      _data_pdfs_dst_10_20_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      _data_pdfs_dst_10_20_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      _data_pdfs_dst_10_20_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      _data_pdfs_dst_10_20_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      _data_pdfs_dst_10_20_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      _data_pdfs_dst_10_20_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      _data_pdfs_dst_10_20_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      _data_pdfs_dst_10_21_30[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      _data_pdfs_dst_10_21_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      _data_pdfs_dst_10_21_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      _data_pdfs_dst_10_21_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      _data_pdfs_dst_10_21_34[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      _data_pdfs_dst_10_21_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      _data_pdfs_dst_10_21_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      _data_pdfs_dst_10_21_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      _data_pdfs_dst_10_21_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      _data_pdfs_dst_10_21_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      _data_pdfs_dst_10_21_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      _data_pdfs_dst_10_21_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      _data_pdfs_dst_10_21_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      _data_pdfs_dst_10_21_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      _data_pdfs_dst_10_21_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      _data_pdfs_dst_10_21_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      _data_pdfs_dst_10_21_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_11_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_11_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_11_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_11_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_11_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_11_21_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_11_21_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_11_21_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_11_21_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_11_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      _data_pdfs_dst_11_20_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      _data_pdfs_dst_11_20_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      _data_pdfs_dst_11_20_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      _data_pdfs_dst_11_21_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      _data_pdfs_dst_11_21_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      _data_pdfs_dst_11_21_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      _data_pdfs_dst_11_21_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackRedistribute_BS {
+static FUNC_PREFIX void storagespecification_unpackRedistribute_BS(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_11_21_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2;
+      _data_pdfs_dst_11_21_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_11_21_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_11_21_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_11_21_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_11_21_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_11_21_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_11_21_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_11_21_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_11_21_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_11_21_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_11_21_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_11_21_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_11_21_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_11_21_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_11_21_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_11_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_11_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_11_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_11_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      _data_pdfs_dst_11_21_30[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      _data_pdfs_dst_11_21_31[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      _data_pdfs_dst_11_21_32[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      _data_pdfs_dst_11_21_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      _data_pdfs_dst_11_21_34[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      _data_pdfs_dst_11_21_35[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      _data_pdfs_dst_11_21_36[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      _data_pdfs_dst_11_21_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      _data_pdfs_dst_11_21_38[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      _data_pdfs_dst_11_21_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      _data_pdfs_dst_11_21_310[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      _data_pdfs_dst_11_21_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      _data_pdfs_dst_11_21_312[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      _data_pdfs_dst_11_21_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      _data_pdfs_dst_11_21_314[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      _data_pdfs_dst_11_21_315[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      _data_pdfs_dst_11_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      _data_pdfs_dst_11_21_318[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_11_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_10_21_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_10_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      _data_pdfs_dst_11_20_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      _data_pdfs_dst_10_21_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      _data_pdfs_dst_10_20_311[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackRedistribute_BE {
+static FUNC_PREFIX void storagespecification_unpackRedistribute_BE(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_21_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2;
+      _data_pdfs_dst_10_21_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_10_21_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_10_21_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_10_21_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_10_21_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_10_21_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_10_21_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_10_21_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_10_21_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_10_21_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_10_21_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_10_21_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_10_21_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_10_21_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_10_21_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_10_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_10_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_10_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_10_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_11_21_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2;
+      _data_pdfs_dst_11_21_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_11_21_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_11_21_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_11_21_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_11_21_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_11_21_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_11_21_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_11_21_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_11_21_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_11_21_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_11_21_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_11_21_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_11_21_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_11_21_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_11_21_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_11_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_11_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_11_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_11_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_10_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      _data_pdfs_dst_10_21_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      _data_pdfs_dst_10_20_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_11_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      _data_pdfs_dst_11_21_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      _data_pdfs_dst_11_20_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackRedistribute_SE {
+static FUNC_PREFIX void storagespecification_unpackRedistribute_SE(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_11_20_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2;
+      _data_pdfs_dst_11_20_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_11_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_11_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_11_20_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_11_20_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_11_20_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_11_20_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_11_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_11_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_11_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_11_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_11_20_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_11_20_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_11_20_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_11_20_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_11_20_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_11_20_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_11_20_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_11_20_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_11_21_30 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2;
+      _data_pdfs_dst_11_21_30[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2))];
+      double * RESTRICT  _data_pdfs_dst_11_21_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_31[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 1];
+      double * RESTRICT  _data_pdfs_dst_11_21_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_32[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 2];
+      double * RESTRICT  _data_pdfs_dst_11_21_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_33[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 3];
+      double * RESTRICT  _data_pdfs_dst_11_21_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_34[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 4];
+      double * RESTRICT  _data_pdfs_dst_11_21_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_35[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 5];
+      double * RESTRICT  _data_pdfs_dst_11_21_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_36[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 6];
+      double * RESTRICT  _data_pdfs_dst_11_21_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_11_21_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_38[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 8];
+      double * RESTRICT  _data_pdfs_dst_11_21_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_39[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 9];
+      double * RESTRICT  _data_pdfs_dst_11_21_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_310[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 10];
+      double * RESTRICT  _data_pdfs_dst_11_21_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_311[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 11];
+      double * RESTRICT  _data_pdfs_dst_11_21_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_312[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 12];
+      double * RESTRICT  _data_pdfs_dst_11_21_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_313[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 13];
+      double * RESTRICT  _data_pdfs_dst_11_21_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_314[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 14];
+      double * RESTRICT  _data_pdfs_dst_11_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_315[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 15];
+      double * RESTRICT  _data_pdfs_dst_11_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 16];
+      double * RESTRICT  _data_pdfs_dst_11_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_317[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 17];
+      double * RESTRICT  _data_pdfs_dst_11_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_318[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 18];
+      double * RESTRICT  _data_pdfs_dst_10_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      _data_pdfs_dst_11_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      _data_pdfs_dst_10_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      double * RESTRICT  _data_pdfs_dst_10_21_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_37[_stride_pdfs_dst_0*ctr_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      _data_pdfs_dst_11_21_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+      _data_pdfs_dst_10_21_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = _data_buffer[19*((_size_pdfs_dst_0) / (2))*((_size_pdfs_dst_1) / (2))*((ctr_2) / (2)) + 19*((_size_pdfs_dst_0) / (2))*((ctr_1) / (2)) + 19*((ctr_0) / (2)) + 7];
+   } 
+}
+}
+
+namespace internal_storagespecification_packPartialCoalescence_N {
+static FUNC_PREFIX void storagespecification_packPartialCoalescence_N(double * RESTRICT  _data_buffer, uint32_t * RESTRICT const _data_mask, double * RESTRICT const _data_pdfs_src, int64_t const _size_mask_0, int64_t const _size_mask_1, int64_t const _size_mask_2, int64_t const _stride_mask_0, int64_t const _stride_mask_1, int64_t const _stride_mask_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_mask_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_mask_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_mask_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      uint32_t * RESTRICT _data_mask_10_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_1m1_20_31 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_10_20_31 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_10_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_1m1_21_31 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + _stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_10_21_31 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + _stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2))] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (1) & 1) * (_data_pdfs_src_1m1_20_31[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (1) & 1) * (_data_pdfs_src_1m1_20_31[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (1) & 1) * (_data_pdfs_src_1m1_21_31[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (1) & 1) * (_data_pdfs_src_1m1_21_31[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (1) & 1) * (_data_pdfs_src_10_20_31[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (1) & 1) * (_data_pdfs_src_10_20_31[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (1) & 1) * (_data_pdfs_src_10_21_31[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (1) & 1) * (_data_pdfs_src_10_21_31[_stride_pdfs_src_0*ctr_0]);
+      double * RESTRICT _data_pdfs_src_1m1_20_37 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_10_20_37 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_1m1_21_37 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 7*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_10_21_37 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 7*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2)) + 1] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (7) & 1) * (_data_pdfs_src_1m1_20_37[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (7) & 1) * (_data_pdfs_src_1m1_20_37[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (7) & 1) * (_data_pdfs_src_1m1_21_37[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (7) & 1) * (_data_pdfs_src_1m1_21_37[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (7) & 1) * (_data_pdfs_src_10_20_37[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (7) & 1) * (_data_pdfs_src_10_20_37[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (7) & 1) * (_data_pdfs_src_10_21_37[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (7) & 1) * (_data_pdfs_src_10_21_37[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]);
+      double * RESTRICT _data_pdfs_src_1m1_20_38 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_10_20_38 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_1m1_21_38 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 8*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_10_21_38 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 8*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2)) + 2] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (8) & 1) * (_data_pdfs_src_1m1_20_38[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (8) & 1) * (_data_pdfs_src_1m1_20_38[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (8) & 1) * (_data_pdfs_src_1m1_21_38[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (8) & 1) * (_data_pdfs_src_1m1_21_38[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (8) & 1) * (_data_pdfs_src_10_20_38[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (8) & 1) * (_data_pdfs_src_10_20_38[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (8) & 1) * (_data_pdfs_src_10_21_38[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (8) & 1) * (_data_pdfs_src_10_21_38[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]);
+      double * RESTRICT _data_pdfs_src_1m1_2m1_311 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 11*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_10_2m1_311 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 11*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_1m1_20_311 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_10_20_311 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2)) + 3] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (11) & 1) * (_data_pdfs_src_1m1_2m1_311[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (11) & 1) * (_data_pdfs_src_1m1_2m1_311[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (11) & 1) * (_data_pdfs_src_1m1_20_311[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (11) & 1) * (_data_pdfs_src_1m1_20_311[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (11) & 1) * (_data_pdfs_src_10_2m1_311[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (11) & 1) * (_data_pdfs_src_10_2m1_311[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (11) & 1) * (_data_pdfs_src_10_20_311[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (11) & 1) * (_data_pdfs_src_10_20_311[_stride_pdfs_src_0*ctr_0]);
+      double * RESTRICT _data_pdfs_src_1m1_21_315 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 15*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_10_21_315 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 15*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_1m1_22_315 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 15*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_10_22_315 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 15*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2)) + 4] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (15) & 1) * (_data_pdfs_src_1m1_21_315[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (15) & 1) * (_data_pdfs_src_1m1_21_315[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (15) & 1) * (_data_pdfs_src_1m1_22_315[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (15) & 1) * (_data_pdfs_src_1m1_22_315[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (15) & 1) * (_data_pdfs_src_10_21_315[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (15) & 1) * (_data_pdfs_src_10_21_315[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (15) & 1) * (_data_pdfs_src_10_22_315[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (15) & 1) * (_data_pdfs_src_10_22_315[_stride_pdfs_src_0*ctr_0]);
+   } 
+}
+}
+
+namespace internal_storagespecification_packPartialCoalescence_BE {
+static FUNC_PREFIX void storagespecification_packPartialCoalescence_BE(double * RESTRICT  _data_buffer, uint32_t * RESTRICT const _data_mask, double * RESTRICT const _data_pdfs_src, int64_t const _size_mask_0, int64_t const _size_mask_1, int64_t const _size_mask_2, int64_t const _stride_mask_0, int64_t const _stride_mask_1, int64_t const _stride_mask_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_mask_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_mask_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_mask_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      uint32_t * RESTRICT _data_mask_10_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_10_21_318 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 18*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_11_21_318 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 18*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_10_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_10_22_318 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 18*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_11_22_318 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 18*_stride_pdfs_src_3;
+      _data_buffer[((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + ((_size_mask_0) / (2))*((ctr_1) / (2)) + ((ctr_0) / (2))] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (18) & 1) * (_data_pdfs_src_10_21_318[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (18) & 1) * (_data_pdfs_src_10_21_318[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (18) & 1) * (_data_pdfs_src_10_22_318[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (18) & 1) * (_data_pdfs_src_10_22_318[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (18) & 1) * (_data_pdfs_src_11_21_318[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (18) & 1) * (_data_pdfs_src_11_21_318[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (18) & 1) * (_data_pdfs_src_11_22_318[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (18) & 1) * (_data_pdfs_src_11_22_318[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]);
+   } 
+}
+}
+
+namespace internal_storagespecification_packPartialCoalescence_BN {
+static FUNC_PREFIX void storagespecification_packPartialCoalescence_BN(double * RESTRICT  _data_buffer, uint32_t * RESTRICT const _data_mask, double * RESTRICT const _data_pdfs_src, int64_t const _size_mask_0, int64_t const _size_mask_1, int64_t const _size_mask_2, int64_t const _stride_mask_0, int64_t const _stride_mask_1, int64_t const _stride_mask_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_mask_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_mask_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_mask_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      uint32_t * RESTRICT _data_mask_10_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_1m1_21_315 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 15*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_10_21_315 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 15*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_10_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_1m1_22_315 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 15*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_10_22_315 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 15*_stride_pdfs_src_3;
+      _data_buffer[((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + ((_size_mask_0) / (2))*((ctr_1) / (2)) + ((ctr_0) / (2))] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (15) & 1) * (_data_pdfs_src_1m1_21_315[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (15) & 1) * (_data_pdfs_src_1m1_21_315[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (15) & 1) * (_data_pdfs_src_1m1_22_315[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (15) & 1) * (_data_pdfs_src_1m1_22_315[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (15) & 1) * (_data_pdfs_src_10_21_315[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (15) & 1) * (_data_pdfs_src_10_21_315[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (15) & 1) * (_data_pdfs_src_10_22_315[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (15) & 1) * (_data_pdfs_src_10_22_315[_stride_pdfs_src_0*ctr_0]);
+   } 
+}
+}
+
+namespace internal_storagespecification_packPartialCoalescence_NE {
+static FUNC_PREFIX void storagespecification_packPartialCoalescence_NE(double * RESTRICT  _data_buffer, uint32_t * RESTRICT const _data_mask, double * RESTRICT const _data_pdfs_src, int64_t const _size_mask_0, int64_t const _size_mask_1, int64_t const _size_mask_2, int64_t const _stride_mask_0, int64_t const _stride_mask_1, int64_t const _stride_mask_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_mask_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_mask_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_mask_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      uint32_t * RESTRICT _data_mask_10_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_1m1_20_38 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_10_20_38 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_10_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_1m1_21_38 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 8*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_10_21_38 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 8*_stride_pdfs_src_3;
+      _data_buffer[((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + ((_size_mask_0) / (2))*((ctr_1) / (2)) + ((ctr_0) / (2))] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (8) & 1) * (_data_pdfs_src_1m1_20_38[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (8) & 1) * (_data_pdfs_src_1m1_20_38[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (8) & 1) * (_data_pdfs_src_1m1_21_38[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (8) & 1) * (_data_pdfs_src_1m1_21_38[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (8) & 1) * (_data_pdfs_src_10_20_38[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (8) & 1) * (_data_pdfs_src_10_20_38[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (8) & 1) * (_data_pdfs_src_10_21_38[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (8) & 1) * (_data_pdfs_src_10_21_38[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]);
+   } 
+}
+}
+
+namespace internal_storagespecification_packPartialCoalescence_TE {
+static FUNC_PREFIX void storagespecification_packPartialCoalescence_TE(double * RESTRICT  _data_buffer, uint32_t * RESTRICT const _data_mask, double * RESTRICT const _data_pdfs_src, int64_t const _size_mask_0, int64_t const _size_mask_1, int64_t const _size_mask_2, int64_t const _stride_mask_0, int64_t const _stride_mask_1, int64_t const _stride_mask_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_mask_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_mask_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_mask_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      uint32_t * RESTRICT _data_mask_10_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_10_2m1_314 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 14*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_11_2m1_314 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 14*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_10_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_10_20_314 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_11_20_314 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3;
+      _data_buffer[((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + ((_size_mask_0) / (2))*((ctr_1) / (2)) + ((ctr_0) / (2))] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (14) & 1) * (_data_pdfs_src_10_2m1_314[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (14) & 1) * (_data_pdfs_src_10_2m1_314[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (14) & 1) * (_data_pdfs_src_10_20_314[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (14) & 1) * (_data_pdfs_src_10_20_314[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (14) & 1) * (_data_pdfs_src_11_2m1_314[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (14) & 1) * (_data_pdfs_src_11_2m1_314[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (14) & 1) * (_data_pdfs_src_11_20_314[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (14) & 1) * (_data_pdfs_src_11_20_314[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]);
+   } 
+}
+}
+
+namespace internal_storagespecification_packPartialCoalescence_TW {
+static FUNC_PREFIX void storagespecification_packPartialCoalescence_TW(double * RESTRICT  _data_buffer, uint32_t * RESTRICT const _data_mask, double * RESTRICT const _data_pdfs_src, int64_t const _size_mask_0, int64_t const _size_mask_1, int64_t const _size_mask_2, int64_t const _stride_mask_0, int64_t const _stride_mask_1, int64_t const _stride_mask_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_mask_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_mask_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_mask_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      uint32_t * RESTRICT _data_mask_10_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_10_2m1_313 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 13*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_11_2m1_313 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 13*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_10_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_10_20_313 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_11_20_313 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3;
+      _data_buffer[((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + ((_size_mask_0) / (2))*((ctr_1) / (2)) + ((ctr_0) / (2))] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (13) & 1) * (_data_pdfs_src_10_2m1_313[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (13) & 1) * (_data_pdfs_src_10_2m1_313[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (13) & 1) * (_data_pdfs_src_10_20_313[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (13) & 1) * (_data_pdfs_src_10_20_313[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (13) & 1) * (_data_pdfs_src_11_2m1_313[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (13) & 1) * (_data_pdfs_src_11_2m1_313[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (13) & 1) * (_data_pdfs_src_11_20_313[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (13) & 1) * (_data_pdfs_src_11_20_313[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]);
+   } 
+}
+}
+
+namespace internal_storagespecification_packPartialCoalescence_SE {
+static FUNC_PREFIX void storagespecification_packPartialCoalescence_SE(double * RESTRICT  _data_buffer, uint32_t * RESTRICT const _data_mask, double * RESTRICT const _data_pdfs_src, int64_t const _size_mask_0, int64_t const _size_mask_1, int64_t const _size_mask_2, int64_t const _stride_mask_0, int64_t const _stride_mask_1, int64_t const _stride_mask_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_mask_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_mask_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_mask_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      uint32_t * RESTRICT _data_mask_10_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_11_20_310 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_12_20_310 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_10_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_11_21_310 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 10*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_12_21_310 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 10*_stride_pdfs_src_3;
+      _data_buffer[((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + ((_size_mask_0) / (2))*((ctr_1) / (2)) + ((ctr_0) / (2))] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (10) & 1) * (_data_pdfs_src_11_20_310[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (10) & 1) * (_data_pdfs_src_11_20_310[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (10) & 1) * (_data_pdfs_src_11_21_310[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (10) & 1) * (_data_pdfs_src_11_21_310[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (10) & 1) * (_data_pdfs_src_12_20_310[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (10) & 1) * (_data_pdfs_src_12_20_310[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (10) & 1) * (_data_pdfs_src_12_21_310[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (10) & 1) * (_data_pdfs_src_12_21_310[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]);
+   } 
+}
+}
+
+namespace internal_storagespecification_packPartialCoalescence_TN {
+static FUNC_PREFIX void storagespecification_packPartialCoalescence_TN(double * RESTRICT  _data_buffer, uint32_t * RESTRICT const _data_mask, double * RESTRICT const _data_pdfs_src, int64_t const _size_mask_0, int64_t const _size_mask_1, int64_t const _size_mask_2, int64_t const _stride_mask_0, int64_t const _stride_mask_1, int64_t const _stride_mask_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_mask_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_mask_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_mask_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      uint32_t * RESTRICT _data_mask_10_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_1m1_2m1_311 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 11*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_10_2m1_311 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 11*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_10_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_1m1_20_311 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_10_20_311 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3;
+      _data_buffer[((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + ((_size_mask_0) / (2))*((ctr_1) / (2)) + ((ctr_0) / (2))] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (11) & 1) * (_data_pdfs_src_1m1_2m1_311[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (11) & 1) * (_data_pdfs_src_1m1_2m1_311[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (11) & 1) * (_data_pdfs_src_1m1_20_311[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (11) & 1) * (_data_pdfs_src_1m1_20_311[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (11) & 1) * (_data_pdfs_src_10_2m1_311[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (11) & 1) * (_data_pdfs_src_10_2m1_311[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (11) & 1) * (_data_pdfs_src_10_20_311[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (11) & 1) * (_data_pdfs_src_10_20_311[_stride_pdfs_src_0*ctr_0]);
+   } 
+}
+}
+
+namespace internal_storagespecification_packPartialCoalescence_BW {
+static FUNC_PREFIX void storagespecification_packPartialCoalescence_BW(double * RESTRICT  _data_buffer, uint32_t * RESTRICT const _data_mask, double * RESTRICT const _data_pdfs_src, int64_t const _size_mask_0, int64_t const _size_mask_1, int64_t const _size_mask_2, int64_t const _stride_mask_0, int64_t const _stride_mask_1, int64_t const _stride_mask_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_mask_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_mask_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_mask_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      uint32_t * RESTRICT _data_mask_10_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_10_21_317 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 17*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_11_21_317 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 17*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_10_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_10_22_317 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 17*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_11_22_317 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 17*_stride_pdfs_src_3;
+      _data_buffer[((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + ((_size_mask_0) / (2))*((ctr_1) / (2)) + ((ctr_0) / (2))] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (17) & 1) * (_data_pdfs_src_10_21_317[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (17) & 1) * (_data_pdfs_src_10_21_317[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (17) & 1) * (_data_pdfs_src_10_22_317[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (17) & 1) * (_data_pdfs_src_10_22_317[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (17) & 1) * (_data_pdfs_src_11_21_317[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (17) & 1) * (_data_pdfs_src_11_21_317[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (17) & 1) * (_data_pdfs_src_11_22_317[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (17) & 1) * (_data_pdfs_src_11_22_317[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]);
+   } 
+}
+}
+
+namespace internal_storagespecification_packPartialCoalescence_SW {
+static FUNC_PREFIX void storagespecification_packPartialCoalescence_SW(double * RESTRICT  _data_buffer, uint32_t * RESTRICT const _data_mask, double * RESTRICT const _data_pdfs_src, int64_t const _size_mask_0, int64_t const _size_mask_1, int64_t const _size_mask_2, int64_t const _stride_mask_0, int64_t const _stride_mask_1, int64_t const _stride_mask_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_mask_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_mask_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_mask_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      uint32_t * RESTRICT _data_mask_10_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_11_20_39 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_12_20_39 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_10_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_11_21_39 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 9*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_12_21_39 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 9*_stride_pdfs_src_3;
+      _data_buffer[((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + ((_size_mask_0) / (2))*((ctr_1) / (2)) + ((ctr_0) / (2))] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (9) & 1) * (_data_pdfs_src_11_20_39[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (9) & 1) * (_data_pdfs_src_11_20_39[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (9) & 1) * (_data_pdfs_src_11_21_39[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (9) & 1) * (_data_pdfs_src_11_21_39[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (9) & 1) * (_data_pdfs_src_12_20_39[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (9) & 1) * (_data_pdfs_src_12_20_39[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (9) & 1) * (_data_pdfs_src_12_21_39[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (9) & 1) * (_data_pdfs_src_12_21_39[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]);
+   } 
+}
+}
+
+namespace internal_storagespecification_packPartialCoalescence_S {
+static FUNC_PREFIX void storagespecification_packPartialCoalescence_S(double * RESTRICT  _data_buffer, uint32_t * RESTRICT const _data_mask, double * RESTRICT const _data_pdfs_src, int64_t const _size_mask_0, int64_t const _size_mask_1, int64_t const _size_mask_2, int64_t const _stride_mask_0, int64_t const _stride_mask_1, int64_t const _stride_mask_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_mask_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_mask_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_mask_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      uint32_t * RESTRICT _data_mask_10_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_11_20_32 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_12_20_32 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_10_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_11_21_32 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 2*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_12_21_32 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 2*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2))] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (2) & 1) * (_data_pdfs_src_11_20_32[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (2) & 1) * (_data_pdfs_src_11_20_32[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (2) & 1) * (_data_pdfs_src_11_21_32[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (2) & 1) * (_data_pdfs_src_11_21_32[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (2) & 1) * (_data_pdfs_src_12_20_32[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (2) & 1) * (_data_pdfs_src_12_20_32[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (2) & 1) * (_data_pdfs_src_12_21_32[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (2) & 1) * (_data_pdfs_src_12_21_32[_stride_pdfs_src_0*ctr_0]);
+      double * RESTRICT _data_pdfs_src_11_20_39 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_12_20_39 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_11_21_39 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 9*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_12_21_39 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 9*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2)) + 1] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (9) & 1) * (_data_pdfs_src_11_20_39[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (9) & 1) * (_data_pdfs_src_11_20_39[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (9) & 1) * (_data_pdfs_src_11_21_39[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (9) & 1) * (_data_pdfs_src_11_21_39[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (9) & 1) * (_data_pdfs_src_12_20_39[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (9) & 1) * (_data_pdfs_src_12_20_39[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (9) & 1) * (_data_pdfs_src_12_21_39[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (9) & 1) * (_data_pdfs_src_12_21_39[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]);
+      double * RESTRICT _data_pdfs_src_11_20_310 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_12_20_310 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_11_21_310 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 10*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_12_21_310 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 10*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2)) + 2] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (10) & 1) * (_data_pdfs_src_11_20_310[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (10) & 1) * (_data_pdfs_src_11_20_310[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (10) & 1) * (_data_pdfs_src_11_21_310[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (10) & 1) * (_data_pdfs_src_11_21_310[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (10) & 1) * (_data_pdfs_src_12_20_310[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (10) & 1) * (_data_pdfs_src_12_20_310[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (10) & 1) * (_data_pdfs_src_12_21_310[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (10) & 1) * (_data_pdfs_src_12_21_310[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]);
+      double * RESTRICT _data_pdfs_src_11_2m1_312 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 12*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_12_2m1_312 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 12*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_11_20_312 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_12_20_312 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2)) + 3] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (12) & 1) * (_data_pdfs_src_11_2m1_312[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (12) & 1) * (_data_pdfs_src_11_2m1_312[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (12) & 1) * (_data_pdfs_src_11_20_312[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (12) & 1) * (_data_pdfs_src_11_20_312[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (12) & 1) * (_data_pdfs_src_12_2m1_312[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (12) & 1) * (_data_pdfs_src_12_2m1_312[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (12) & 1) * (_data_pdfs_src_12_20_312[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (12) & 1) * (_data_pdfs_src_12_20_312[_stride_pdfs_src_0*ctr_0]);
+      double * RESTRICT _data_pdfs_src_11_21_316 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 16*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_12_21_316 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 16*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_11_22_316 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 16*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_12_22_316 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 16*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2)) + 4] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (16) & 1) * (_data_pdfs_src_11_21_316[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (16) & 1) * (_data_pdfs_src_11_21_316[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (16) & 1) * (_data_pdfs_src_11_22_316[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (16) & 1) * (_data_pdfs_src_11_22_316[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (16) & 1) * (_data_pdfs_src_12_21_316[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (16) & 1) * (_data_pdfs_src_12_21_316[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (16) & 1) * (_data_pdfs_src_12_22_316[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (16) & 1) * (_data_pdfs_src_12_22_316[_stride_pdfs_src_0*ctr_0]);
+   } 
+}
+}
+
+namespace internal_storagespecification_packPartialCoalescence_TS {
+static FUNC_PREFIX void storagespecification_packPartialCoalescence_TS(double * RESTRICT  _data_buffer, uint32_t * RESTRICT const _data_mask, double * RESTRICT const _data_pdfs_src, int64_t const _size_mask_0, int64_t const _size_mask_1, int64_t const _size_mask_2, int64_t const _stride_mask_0, int64_t const _stride_mask_1, int64_t const _stride_mask_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_mask_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_mask_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_mask_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      uint32_t * RESTRICT _data_mask_10_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_11_2m1_312 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 12*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_12_2m1_312 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 12*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_10_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_11_20_312 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_12_20_312 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3;
+      _data_buffer[((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + ((_size_mask_0) / (2))*((ctr_1) / (2)) + ((ctr_0) / (2))] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (12) & 1) * (_data_pdfs_src_11_2m1_312[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (12) & 1) * (_data_pdfs_src_11_2m1_312[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (12) & 1) * (_data_pdfs_src_11_20_312[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (12) & 1) * (_data_pdfs_src_11_20_312[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (12) & 1) * (_data_pdfs_src_12_2m1_312[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (12) & 1) * (_data_pdfs_src_12_2m1_312[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (12) & 1) * (_data_pdfs_src_12_20_312[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (12) & 1) * (_data_pdfs_src_12_20_312[_stride_pdfs_src_0*ctr_0]);
+   } 
+}
+}
+
+namespace internal_storagespecification_packPartialCoalescence_B {
+static FUNC_PREFIX void storagespecification_packPartialCoalescence_B(double * RESTRICT  _data_buffer, uint32_t * RESTRICT const _data_mask, double * RESTRICT const _data_pdfs_src, int64_t const _size_mask_0, int64_t const _size_mask_1, int64_t const _size_mask_2, int64_t const _stride_mask_0, int64_t const _stride_mask_1, int64_t const _stride_mask_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_mask_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_mask_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_mask_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      uint32_t * RESTRICT _data_mask_10_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_10_21_36 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 6*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_11_21_36 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 6*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_10_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_10_22_36 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 6*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_11_22_36 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 6*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2))] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (6) & 1) * (_data_pdfs_src_10_21_36[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (6) & 1) * (_data_pdfs_src_10_21_36[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (6) & 1) * (_data_pdfs_src_10_22_36[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (6) & 1) * (_data_pdfs_src_10_22_36[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (6) & 1) * (_data_pdfs_src_11_21_36[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (6) & 1) * (_data_pdfs_src_11_21_36[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (6) & 1) * (_data_pdfs_src_11_22_36[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (6) & 1) * (_data_pdfs_src_11_22_36[_stride_pdfs_src_0*ctr_0]);
+      double * RESTRICT _data_pdfs_src_1m1_21_315 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 15*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_10_21_315 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 15*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_1m1_22_315 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 15*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_10_22_315 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 15*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2)) + 1] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (15) & 1) * (_data_pdfs_src_1m1_21_315[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (15) & 1) * (_data_pdfs_src_1m1_21_315[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (15) & 1) * (_data_pdfs_src_1m1_22_315[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (15) & 1) * (_data_pdfs_src_1m1_22_315[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (15) & 1) * (_data_pdfs_src_10_21_315[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (15) & 1) * (_data_pdfs_src_10_21_315[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (15) & 1) * (_data_pdfs_src_10_22_315[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (15) & 1) * (_data_pdfs_src_10_22_315[_stride_pdfs_src_0*ctr_0]);
+      double * RESTRICT _data_pdfs_src_11_21_316 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 16*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_12_21_316 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 16*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_11_22_316 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 16*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_12_22_316 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 16*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2)) + 2] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (16) & 1) * (_data_pdfs_src_11_21_316[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (16) & 1) * (_data_pdfs_src_11_21_316[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (16) & 1) * (_data_pdfs_src_11_22_316[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (16) & 1) * (_data_pdfs_src_11_22_316[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (16) & 1) * (_data_pdfs_src_12_21_316[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (16) & 1) * (_data_pdfs_src_12_21_316[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (16) & 1) * (_data_pdfs_src_12_22_316[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (16) & 1) * (_data_pdfs_src_12_22_316[_stride_pdfs_src_0*ctr_0]);
+      double * RESTRICT _data_pdfs_src_10_21_317 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 17*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_11_21_317 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 17*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_10_22_317 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 17*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_11_22_317 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 17*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2)) + 3] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (17) & 1) * (_data_pdfs_src_10_21_317[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (17) & 1) * (_data_pdfs_src_10_21_317[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (17) & 1) * (_data_pdfs_src_10_22_317[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (17) & 1) * (_data_pdfs_src_10_22_317[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (17) & 1) * (_data_pdfs_src_11_21_317[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (17) & 1) * (_data_pdfs_src_11_21_317[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (17) & 1) * (_data_pdfs_src_11_22_317[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (17) & 1) * (_data_pdfs_src_11_22_317[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]);
+      double * RESTRICT _data_pdfs_src_10_21_318 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 18*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_11_21_318 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 18*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_10_22_318 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 18*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_11_22_318 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 18*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2)) + 4] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (18) & 1) * (_data_pdfs_src_10_21_318[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (18) & 1) * (_data_pdfs_src_10_21_318[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (18) & 1) * (_data_pdfs_src_10_22_318[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (18) & 1) * (_data_pdfs_src_10_22_318[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (18) & 1) * (_data_pdfs_src_11_21_318[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (18) & 1) * (_data_pdfs_src_11_21_318[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (18) & 1) * (_data_pdfs_src_11_22_318[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (18) & 1) * (_data_pdfs_src_11_22_318[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]);
+   } 
+}
+}
+
+namespace internal_storagespecification_packPartialCoalescence_BS {
+static FUNC_PREFIX void storagespecification_packPartialCoalescence_BS(double * RESTRICT  _data_buffer, uint32_t * RESTRICT const _data_mask, double * RESTRICT const _data_pdfs_src, int64_t const _size_mask_0, int64_t const _size_mask_1, int64_t const _size_mask_2, int64_t const _stride_mask_0, int64_t const _stride_mask_1, int64_t const _stride_mask_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_mask_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_mask_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_mask_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      uint32_t * RESTRICT _data_mask_10_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_11_21_316 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 16*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_12_21_316 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 16*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_10_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_11_22_316 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 16*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_12_22_316 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 16*_stride_pdfs_src_3;
+      _data_buffer[((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + ((_size_mask_0) / (2))*((ctr_1) / (2)) + ((ctr_0) / (2))] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (16) & 1) * (_data_pdfs_src_11_21_316[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (16) & 1) * (_data_pdfs_src_11_21_316[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (16) & 1) * (_data_pdfs_src_11_22_316[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (16) & 1) * (_data_pdfs_src_11_22_316[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (16) & 1) * (_data_pdfs_src_12_21_316[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (16) & 1) * (_data_pdfs_src_12_21_316[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (16) & 1) * (_data_pdfs_src_12_22_316[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (16) & 1) * (_data_pdfs_src_12_22_316[_stride_pdfs_src_0*ctr_0]);
+   } 
+}
+}
+
+namespace internal_storagespecification_packPartialCoalescence_W {
+static FUNC_PREFIX void storagespecification_packPartialCoalescence_W(double * RESTRICT  _data_buffer, uint32_t * RESTRICT const _data_mask, double * RESTRICT const _data_pdfs_src, int64_t const _size_mask_0, int64_t const _size_mask_1, int64_t const _size_mask_2, int64_t const _stride_mask_0, int64_t const _stride_mask_1, int64_t const _stride_mask_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_mask_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_mask_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_mask_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      uint32_t * RESTRICT _data_mask_10_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_10_20_33 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_11_20_33 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_10_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_10_21_33 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 3*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_11_21_33 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 3*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2))] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (3) & 1) * (_data_pdfs_src_10_20_33[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (3) & 1) * (_data_pdfs_src_10_20_33[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (3) & 1) * (_data_pdfs_src_10_21_33[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (3) & 1) * (_data_pdfs_src_10_21_33[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (3) & 1) * (_data_pdfs_src_11_20_33[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (3) & 1) * (_data_pdfs_src_11_20_33[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (3) & 1) * (_data_pdfs_src_11_21_33[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (3) & 1) * (_data_pdfs_src_11_21_33[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]);
+      double * RESTRICT _data_pdfs_src_1m1_20_37 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_10_20_37 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_1m1_21_37 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 7*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_10_21_37 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 7*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2)) + 1] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (7) & 1) * (_data_pdfs_src_1m1_20_37[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (7) & 1) * (_data_pdfs_src_1m1_20_37[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (7) & 1) * (_data_pdfs_src_1m1_21_37[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (7) & 1) * (_data_pdfs_src_1m1_21_37[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (7) & 1) * (_data_pdfs_src_10_20_37[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (7) & 1) * (_data_pdfs_src_10_20_37[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (7) & 1) * (_data_pdfs_src_10_21_37[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (7) & 1) * (_data_pdfs_src_10_21_37[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]);
+      double * RESTRICT _data_pdfs_src_11_20_39 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_12_20_39 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_11_21_39 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 9*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_12_21_39 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 9*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2)) + 2] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (9) & 1) * (_data_pdfs_src_11_20_39[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (9) & 1) * (_data_pdfs_src_11_20_39[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (9) & 1) * (_data_pdfs_src_11_21_39[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (9) & 1) * (_data_pdfs_src_11_21_39[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (9) & 1) * (_data_pdfs_src_12_20_39[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (9) & 1) * (_data_pdfs_src_12_20_39[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (9) & 1) * (_data_pdfs_src_12_21_39[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (9) & 1) * (_data_pdfs_src_12_21_39[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]);
+      double * RESTRICT _data_pdfs_src_10_2m1_313 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 13*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_11_2m1_313 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 13*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_10_20_313 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_11_20_313 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2)) + 3] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (13) & 1) * (_data_pdfs_src_10_2m1_313[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (13) & 1) * (_data_pdfs_src_10_2m1_313[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (13) & 1) * (_data_pdfs_src_10_20_313[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (13) & 1) * (_data_pdfs_src_10_20_313[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (13) & 1) * (_data_pdfs_src_11_2m1_313[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (13) & 1) * (_data_pdfs_src_11_2m1_313[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (13) & 1) * (_data_pdfs_src_11_20_313[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (13) & 1) * (_data_pdfs_src_11_20_313[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]);
+      double * RESTRICT _data_pdfs_src_10_21_317 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 17*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_11_21_317 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 17*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_10_22_317 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 17*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_11_22_317 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 17*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2)) + 4] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (17) & 1) * (_data_pdfs_src_10_21_317[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (17) & 1) * (_data_pdfs_src_10_21_317[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (17) & 1) * (_data_pdfs_src_10_22_317[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (17) & 1) * (_data_pdfs_src_10_22_317[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (17) & 1) * (_data_pdfs_src_11_21_317[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (17) & 1) * (_data_pdfs_src_11_21_317[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (17) & 1) * (_data_pdfs_src_11_22_317[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (17) & 1) * (_data_pdfs_src_11_22_317[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]);
+   } 
+}
+}
+
+namespace internal_storagespecification_packPartialCoalescence_E {
+static FUNC_PREFIX void storagespecification_packPartialCoalescence_E(double * RESTRICT  _data_buffer, uint32_t * RESTRICT const _data_mask, double * RESTRICT const _data_pdfs_src, int64_t const _size_mask_0, int64_t const _size_mask_1, int64_t const _size_mask_2, int64_t const _stride_mask_0, int64_t const _stride_mask_1, int64_t const _stride_mask_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_mask_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_mask_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_mask_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      uint32_t * RESTRICT _data_mask_10_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_10_20_34 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_11_20_34 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_10_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_10_21_34 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 4*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_11_21_34 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 4*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2))] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (4) & 1) * (_data_pdfs_src_10_20_34[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (4) & 1) * (_data_pdfs_src_10_20_34[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (4) & 1) * (_data_pdfs_src_10_21_34[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (4) & 1) * (_data_pdfs_src_10_21_34[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (4) & 1) * (_data_pdfs_src_11_20_34[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (4) & 1) * (_data_pdfs_src_11_20_34[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (4) & 1) * (_data_pdfs_src_11_21_34[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (4) & 1) * (_data_pdfs_src_11_21_34[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]);
+      double * RESTRICT _data_pdfs_src_1m1_20_38 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_10_20_38 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_1m1_21_38 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 8*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_10_21_38 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 8*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2)) + 1] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (8) & 1) * (_data_pdfs_src_1m1_20_38[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (8) & 1) * (_data_pdfs_src_1m1_20_38[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (8) & 1) * (_data_pdfs_src_1m1_21_38[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (8) & 1) * (_data_pdfs_src_1m1_21_38[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (8) & 1) * (_data_pdfs_src_10_20_38[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (8) & 1) * (_data_pdfs_src_10_20_38[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (8) & 1) * (_data_pdfs_src_10_21_38[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (8) & 1) * (_data_pdfs_src_10_21_38[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]);
+      double * RESTRICT _data_pdfs_src_11_20_310 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_12_20_310 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_11_21_310 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 10*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_12_21_310 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 10*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2)) + 2] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (10) & 1) * (_data_pdfs_src_11_20_310[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (10) & 1) * (_data_pdfs_src_11_20_310[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (10) & 1) * (_data_pdfs_src_11_21_310[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (10) & 1) * (_data_pdfs_src_11_21_310[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (10) & 1) * (_data_pdfs_src_12_20_310[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (10) & 1) * (_data_pdfs_src_12_20_310[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (10) & 1) * (_data_pdfs_src_12_21_310[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (10) & 1) * (_data_pdfs_src_12_21_310[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]);
+      double * RESTRICT _data_pdfs_src_10_2m1_314 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 14*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_11_2m1_314 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 14*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_10_20_314 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_11_20_314 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2)) + 3] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (14) & 1) * (_data_pdfs_src_10_2m1_314[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (14) & 1) * (_data_pdfs_src_10_2m1_314[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (14) & 1) * (_data_pdfs_src_10_20_314[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (14) & 1) * (_data_pdfs_src_10_20_314[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (14) & 1) * (_data_pdfs_src_11_2m1_314[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (14) & 1) * (_data_pdfs_src_11_2m1_314[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (14) & 1) * (_data_pdfs_src_11_20_314[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (14) & 1) * (_data_pdfs_src_11_20_314[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]);
+      double * RESTRICT _data_pdfs_src_10_21_318 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 18*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_11_21_318 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 18*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_10_22_318 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 18*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_11_22_318 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_2 + 18*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2)) + 4] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (18) & 1) * (_data_pdfs_src_10_21_318[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (18) & 1) * (_data_pdfs_src_10_21_318[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (18) & 1) * (_data_pdfs_src_10_22_318[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (18) & 1) * (_data_pdfs_src_10_22_318[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (18) & 1) * (_data_pdfs_src_11_21_318[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (18) & 1) * (_data_pdfs_src_11_21_318[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (18) & 1) * (_data_pdfs_src_11_22_318[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (18) & 1) * (_data_pdfs_src_11_22_318[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]);
+   } 
+}
+}
+
+namespace internal_storagespecification_packPartialCoalescence_T {
+static FUNC_PREFIX void storagespecification_packPartialCoalescence_T(double * RESTRICT  _data_buffer, uint32_t * RESTRICT const _data_mask, double * RESTRICT const _data_pdfs_src, int64_t const _size_mask_0, int64_t const _size_mask_1, int64_t const _size_mask_2, int64_t const _stride_mask_0, int64_t const _stride_mask_1, int64_t const _stride_mask_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_mask_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_mask_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_mask_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      uint32_t * RESTRICT _data_mask_10_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_10_2m1_35 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 5*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_11_2m1_35 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 5*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_10_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_10_20_35 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_11_20_35 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2))] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (5) & 1) * (_data_pdfs_src_10_2m1_35[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (5) & 1) * (_data_pdfs_src_10_2m1_35[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (5) & 1) * (_data_pdfs_src_10_20_35[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (5) & 1) * (_data_pdfs_src_10_20_35[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (5) & 1) * (_data_pdfs_src_11_2m1_35[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (5) & 1) * (_data_pdfs_src_11_2m1_35[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (5) & 1) * (_data_pdfs_src_11_20_35[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (5) & 1) * (_data_pdfs_src_11_20_35[_stride_pdfs_src_0*ctr_0]);
+      double * RESTRICT _data_pdfs_src_1m1_2m1_311 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 11*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_10_2m1_311 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 11*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_1m1_20_311 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_10_20_311 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2)) + 1] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (11) & 1) * (_data_pdfs_src_1m1_2m1_311[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (11) & 1) * (_data_pdfs_src_1m1_2m1_311[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (11) & 1) * (_data_pdfs_src_1m1_20_311[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (11) & 1) * (_data_pdfs_src_1m1_20_311[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (11) & 1) * (_data_pdfs_src_10_2m1_311[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (11) & 1) * (_data_pdfs_src_10_2m1_311[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (11) & 1) * (_data_pdfs_src_10_20_311[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (11) & 1) * (_data_pdfs_src_10_20_311[_stride_pdfs_src_0*ctr_0]);
+      double * RESTRICT _data_pdfs_src_11_2m1_312 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 12*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_12_2m1_312 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 12*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_11_20_312 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_12_20_312 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + 2*_stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2)) + 2] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (12) & 1) * (_data_pdfs_src_11_2m1_312[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (12) & 1) * (_data_pdfs_src_11_2m1_312[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (12) & 1) * (_data_pdfs_src_11_20_312[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (12) & 1) * (_data_pdfs_src_11_20_312[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (12) & 1) * (_data_pdfs_src_12_2m1_312[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (12) & 1) * (_data_pdfs_src_12_2m1_312[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (12) & 1) * (_data_pdfs_src_12_20_312[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (12) & 1) * (_data_pdfs_src_12_20_312[_stride_pdfs_src_0*ctr_0]);
+      double * RESTRICT _data_pdfs_src_10_2m1_313 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 13*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_11_2m1_313 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 13*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_10_20_313 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_11_20_313 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2)) + 3] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (13) & 1) * (_data_pdfs_src_10_2m1_313[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (13) & 1) * (_data_pdfs_src_10_2m1_313[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (13) & 1) * (_data_pdfs_src_10_20_313[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (13) & 1) * (_data_pdfs_src_10_20_313[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (13) & 1) * (_data_pdfs_src_11_2m1_313[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (13) & 1) * (_data_pdfs_src_11_2m1_313[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (13) & 1) * (_data_pdfs_src_11_20_313[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (13) & 1) * (_data_pdfs_src_11_20_313[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]);
+      double * RESTRICT _data_pdfs_src_10_2m1_314 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 14*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_11_2m1_314 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 - _stride_pdfs_src_2 + 14*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_10_20_314 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_11_20_314 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3;
+      _data_buffer[5*((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + 5*((_size_mask_0) / (2))*((ctr_1) / (2)) + 5*((ctr_0) / (2)) + 4] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (14) & 1) * (_data_pdfs_src_10_2m1_314[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (14) & 1) * (_data_pdfs_src_10_2m1_314[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (14) & 1) * (_data_pdfs_src_10_20_314[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (14) & 1) * (_data_pdfs_src_10_20_314[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (14) & 1) * (_data_pdfs_src_11_2m1_314[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (14) & 1) * (_data_pdfs_src_11_2m1_314[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (14) & 1) * (_data_pdfs_src_11_20_314[_stride_pdfs_src_0*ctr_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (14) & 1) * (_data_pdfs_src_11_20_314[_stride_pdfs_src_0*ctr_0 - _stride_pdfs_src_0]);
+   } 
+}
+}
+
+namespace internal_storagespecification_packPartialCoalescence_NW {
+static FUNC_PREFIX void storagespecification_packPartialCoalescence_NW(double * RESTRICT  _data_buffer, uint32_t * RESTRICT const _data_mask, double * RESTRICT const _data_pdfs_src, int64_t const _size_mask_0, int64_t const _size_mask_1, int64_t const _size_mask_2, int64_t const _stride_mask_0, int64_t const _stride_mask_1, int64_t const _stride_mask_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   if (((((blockDim.x*blockIdx.x + threadIdx.x) % (2))) == (0)) && ((((blockDim.y*blockIdx.y + threadIdx.y) % (2))) == (0)) && ((((blockDim.z*blockIdx.z + threadIdx.z) % (2))) == (0)) && blockDim.x*blockIdx.x + threadIdx.x < _size_mask_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_mask_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_mask_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      uint32_t * RESTRICT _data_mask_10_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_1m1_20_37 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_20 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2;
+      double * RESTRICT _data_pdfs_src_10_20_37 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_10_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_1m1_21_37 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 - _stride_pdfs_src_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 7*_stride_pdfs_src_3;
+      uint32_t * RESTRICT _data_mask_11_21 = _data_mask + _stride_mask_1*ctr_1 + _stride_mask_1 + _stride_mask_2*ctr_2 + _stride_mask_2;
+      double * RESTRICT _data_pdfs_src_10_21_37 = _data_pdfs_src + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_2 + 7*_stride_pdfs_src_3;
+      _data_buffer[((_size_mask_0) / (2))*((_size_mask_1) / (2))*((ctr_2) / (2)) + ((_size_mask_0) / (2))*((ctr_1) / (2)) + ((ctr_0) / (2))] = ((_data_mask_10_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (7) & 1) * (_data_pdfs_src_1m1_20_37[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_20[_stride_mask_0*ctr_0]) >> (7) & 1) * (_data_pdfs_src_1m1_20_37[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (7) & 1) * (_data_pdfs_src_1m1_21_37[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_10_21[_stride_mask_0*ctr_0]) >> (7) & 1) * (_data_pdfs_src_1m1_21_37[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (7) & 1) * (_data_pdfs_src_10_20_37[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_20[_stride_mask_0*ctr_0]) >> (7) & 1) * (_data_pdfs_src_10_20_37[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0 + _stride_mask_0]) >> (7) & 1) * (_data_pdfs_src_10_21_37[_stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_0]) + ((_data_mask_11_21[_stride_mask_0*ctr_0]) >> (7) & 1) * (_data_pdfs_src_10_21_37[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_0]);
+   } 
+}
+}
+
+namespace internal_storagespecification_zeroCoalescenceRegion_E {
+static FUNC_PREFIX void storagespecification_zeroCoalescenceRegion_E(double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.0;
+      double * RESTRICT  _data_pdfs_dst_1m1_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.0;
+      double * RESTRICT  _data_pdfs_dst_11_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.0;
+      double * RESTRICT  _data_pdfs_dst_10_2m1_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_2m1_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.0;
+      double * RESTRICT  _data_pdfs_dst_10_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.0;
+   } 
+}
+}
+
+namespace internal_storagespecification_zeroCoalescenceRegion_BW {
+static FUNC_PREFIX void storagespecification_zeroCoalescenceRegion_BW(double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_2m1_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_2m1_314[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.0;
+   } 
+}
+}
+
+namespace internal_storagespecification_zeroCoalescenceRegion_T {
+static FUNC_PREFIX void storagespecification_zeroCoalescenceRegion_T(double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_21_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_36[_stride_pdfs_dst_0*ctr_0] = 0.0;
+      double * RESTRICT  _data_pdfs_dst_1m1_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_21_315[_stride_pdfs_dst_0*ctr_0] = 0.0;
+      double * RESTRICT  _data_pdfs_dst_11_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0] = 0.0;
+      double * RESTRICT  _data_pdfs_dst_10_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.0;
+      double * RESTRICT  _data_pdfs_dst_10_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.0;
+   } 
+}
+}
+
+namespace internal_storagespecification_zeroCoalescenceRegion_TW {
+static FUNC_PREFIX void storagespecification_zeroCoalescenceRegion_TW(double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.0;
+   } 
+}
+}
+
+namespace internal_storagespecification_zeroCoalescenceRegion_NE {
+static FUNC_PREFIX void storagespecification_zeroCoalescenceRegion_NE(double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_11_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.0;
+   } 
+}
+}
+
+namespace internal_storagespecification_zeroCoalescenceRegion_N {
+static FUNC_PREFIX void storagespecification_zeroCoalescenceRegion_N(double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_11_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_32[_stride_pdfs_dst_0*ctr_0] = 0.0;
+      double * RESTRICT  _data_pdfs_dst_11_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.0;
+      double * RESTRICT  _data_pdfs_dst_11_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.0;
+      double * RESTRICT  _data_pdfs_dst_11_2m1_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_2m1_312[_stride_pdfs_dst_0*ctr_0] = 0.0;
+      double * RESTRICT  _data_pdfs_dst_11_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0] = 0.0;
+   } 
+}
+}
+
+namespace internal_storagespecification_zeroCoalescenceRegion_TE {
+static FUNC_PREFIX void storagespecification_zeroCoalescenceRegion_TE(double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.0;
+   } 
+}
+}
+
+namespace internal_storagespecification_zeroCoalescenceRegion_SW {
+static FUNC_PREFIX void storagespecification_zeroCoalescenceRegion_SW(double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_1m1_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_20_38[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.0;
+   } 
+}
+}
+
+namespace internal_storagespecification_zeroCoalescenceRegion_NW {
+static FUNC_PREFIX void storagespecification_zeroCoalescenceRegion_NW(double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_11_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.0;
+   } 
+}
+}
+
+namespace internal_storagespecification_zeroCoalescenceRegion_BN {
+static FUNC_PREFIX void storagespecification_zeroCoalescenceRegion_BN(double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_11_2m1_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_2m1_312[_stride_pdfs_dst_0*ctr_0] = 0.0;
+   } 
+}
+}
+
+namespace internal_storagespecification_zeroCoalescenceRegion_B {
+static FUNC_PREFIX void storagespecification_zeroCoalescenceRegion_B(double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_2m1_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_2m1_35[_stride_pdfs_dst_0*ctr_0] = 0.0;
+      double * RESTRICT  _data_pdfs_dst_1m1_2m1_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_2m1_311[_stride_pdfs_dst_0*ctr_0] = 0.0;
+      double * RESTRICT  _data_pdfs_dst_11_2m1_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_2m1_312[_stride_pdfs_dst_0*ctr_0] = 0.0;
+      double * RESTRICT  _data_pdfs_dst_10_2m1_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_2m1_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.0;
+      double * RESTRICT  _data_pdfs_dst_10_2m1_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_2m1_314[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.0;
+   } 
+}
+}
+
+namespace internal_storagespecification_zeroCoalescenceRegion_SE {
+static FUNC_PREFIX void storagespecification_zeroCoalescenceRegion_SE(double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_1m1_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.0;
+   } 
+}
+}
+
+namespace internal_storagespecification_zeroCoalescenceRegion_TS {
+static FUNC_PREFIX void storagespecification_zeroCoalescenceRegion_TS(double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_1m1_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_21_315[_stride_pdfs_dst_0*ctr_0] = 0.0;
+   } 
+}
+}
+
+namespace internal_storagespecification_zeroCoalescenceRegion_W {
+static FUNC_PREFIX void storagespecification_zeroCoalescenceRegion_W(double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_34[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.0;
+      double * RESTRICT  _data_pdfs_dst_1m1_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_20_38[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.0;
+      double * RESTRICT  _data_pdfs_dst_11_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.0;
+      double * RESTRICT  _data_pdfs_dst_10_2m1_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_2m1_314[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.0;
+      double * RESTRICT  _data_pdfs_dst_10_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.0;
+   } 
+}
+}
+
+namespace internal_storagespecification_zeroCoalescenceRegion_BE {
+static FUNC_PREFIX void storagespecification_zeroCoalescenceRegion_BE(double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_2m1_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_2m1_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.0;
+   } 
+}
+}
+
+namespace internal_storagespecification_zeroCoalescenceRegion_S {
+static FUNC_PREFIX void storagespecification_zeroCoalescenceRegion_S(double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_1m1_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_20_31[_stride_pdfs_dst_0*ctr_0] = 0.0;
+      double * RESTRICT  _data_pdfs_dst_1m1_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.0;
+      double * RESTRICT  _data_pdfs_dst_1m1_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_20_38[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.0;
+      double * RESTRICT  _data_pdfs_dst_1m1_2m1_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_2m1_311[_stride_pdfs_dst_0*ctr_0] = 0.0;
+      double * RESTRICT  _data_pdfs_dst_1m1_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_21_315[_stride_pdfs_dst_0*ctr_0] = 0.0;
+   } 
+}
+}
+
+namespace internal_storagespecification_zeroCoalescenceRegion_TN {
+static FUNC_PREFIX void storagespecification_zeroCoalescenceRegion_TN(double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_11_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0] = 0.0;
+   } 
+}
+}
+
+namespace internal_storagespecification_zeroCoalescenceRegion_BS {
+static FUNC_PREFIX void storagespecification_zeroCoalescenceRegion_BS(double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_1m1_2m1_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_2m1_311[_stride_pdfs_dst_0*ctr_0] = 0.0;
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackCoalescence_TE {
+static FUNC_PREFIX void storagespecification_unpackCoalescence_TE(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.125*_data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0] + _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackCoalescence_NE {
+static FUNC_PREFIX void storagespecification_unpackCoalescence_NE(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_11_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.125*_data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0] + _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackCoalescence_NW {
+static FUNC_PREFIX void storagespecification_unpackCoalescence_NW(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_11_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.125*_data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0] + _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackCoalescence_BW {
+static FUNC_PREFIX void storagespecification_unpackCoalescence_BW(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_2m1_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_2m1_314[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.125*_data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0] + _data_pdfs_dst_10_2m1_314[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackCoalescence_TS {
+static FUNC_PREFIX void storagespecification_unpackCoalescence_TS(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_1m1_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_21_315[_stride_pdfs_dst_0*ctr_0] = 0.125*_data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0] + _data_pdfs_dst_1m1_21_315[_stride_pdfs_dst_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackCoalescence_SW {
+static FUNC_PREFIX void storagespecification_unpackCoalescence_SW(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_1m1_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_20_38[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.125*_data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0] + _data_pdfs_dst_1m1_20_38[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackCoalescence_BS {
+static FUNC_PREFIX void storagespecification_unpackCoalescence_BS(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_1m1_2m1_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_2m1_311[_stride_pdfs_dst_0*ctr_0] = 0.125*_data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0] + _data_pdfs_dst_1m1_2m1_311[_stride_pdfs_dst_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackCoalescence_E {
+static FUNC_PREFIX void storagespecification_unpackCoalescence_E(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_33 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0] + _data_pdfs_dst_10_20_33[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0];
+      double * RESTRICT  _data_pdfs_dst_1m1_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1] + _data_pdfs_dst_1m1_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0];
+      double * RESTRICT  _data_pdfs_dst_11_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2] + _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0];
+      double * RESTRICT  _data_pdfs_dst_10_2m1_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_2m1_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3] + _data_pdfs_dst_10_2m1_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0];
+      double * RESTRICT  _data_pdfs_dst_10_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4] + _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackCoalescence_BN {
+static FUNC_PREFIX void storagespecification_unpackCoalescence_BN(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_11_2m1_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_2m1_312[_stride_pdfs_dst_0*ctr_0] = 0.125*_data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0] + _data_pdfs_dst_11_2m1_312[_stride_pdfs_dst_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackCoalescence_B {
+static FUNC_PREFIX void storagespecification_unpackCoalescence_B(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_2m1_35 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 5*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_2m1_35[_stride_pdfs_dst_0*ctr_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0] + _data_pdfs_dst_10_2m1_35[_stride_pdfs_dst_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_1m1_2m1_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_2m1_311[_stride_pdfs_dst_0*ctr_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1] + _data_pdfs_dst_1m1_2m1_311[_stride_pdfs_dst_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_11_2m1_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_2m1_312[_stride_pdfs_dst_0*ctr_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2] + _data_pdfs_dst_11_2m1_312[_stride_pdfs_dst_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_2m1_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_2m1_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3] + _data_pdfs_dst_10_2m1_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0];
+      double * RESTRICT  _data_pdfs_dst_10_2m1_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_2m1_314[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4] + _data_pdfs_dst_10_2m1_314[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackCoalescence_W {
+static FUNC_PREFIX void storagespecification_unpackCoalescence_W(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_20_34 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_20_34[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0] + _data_pdfs_dst_10_20_34[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0];
+      double * RESTRICT  _data_pdfs_dst_1m1_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_20_38[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1] + _data_pdfs_dst_1m1_20_38[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0];
+      double * RESTRICT  _data_pdfs_dst_11_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2] + _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0];
+      double * RESTRICT  _data_pdfs_dst_10_2m1_314 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 14*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_2m1_314[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3] + _data_pdfs_dst_10_2m1_314[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0];
+      double * RESTRICT  _data_pdfs_dst_10_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4] + _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackCoalescence_TW {
+static FUNC_PREFIX void storagespecification_unpackCoalescence_TW(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.125*_data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0] + _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackCoalescence_SE {
+static FUNC_PREFIX void storagespecification_unpackCoalescence_SE(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_1m1_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.125*_data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0] + _data_pdfs_dst_1m1_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackCoalescence_S {
+static FUNC_PREFIX void storagespecification_unpackCoalescence_S(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_1m1_20_31 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_20_31[_stride_pdfs_dst_0*ctr_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0] + _data_pdfs_dst_1m1_20_31[_stride_pdfs_dst_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_1m1_20_37 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1] + _data_pdfs_dst_1m1_20_37[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0];
+      double * RESTRICT  _data_pdfs_dst_1m1_20_38 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_20_38[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2] + _data_pdfs_dst_1m1_20_38[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0];
+      double * RESTRICT  _data_pdfs_dst_1m1_2m1_311 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 11*_stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_2m1_311[_stride_pdfs_dst_0*ctr_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3] + _data_pdfs_dst_1m1_2m1_311[_stride_pdfs_dst_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_1m1_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_21_315[_stride_pdfs_dst_0*ctr_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4] + _data_pdfs_dst_1m1_21_315[_stride_pdfs_dst_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackCoalescence_N {
+static FUNC_PREFIX void storagespecification_unpackCoalescence_N(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_11_20_32 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_32[_stride_pdfs_dst_0*ctr_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0] + _data_pdfs_dst_11_20_32[_stride_pdfs_dst_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_11_20_39 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1] + _data_pdfs_dst_11_20_39[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0];
+      double * RESTRICT  _data_pdfs_dst_11_20_310 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2] + _data_pdfs_dst_11_20_310[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0];
+      double * RESTRICT  _data_pdfs_dst_11_2m1_312 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 12*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_2m1_312[_stride_pdfs_dst_0*ctr_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3] + _data_pdfs_dst_11_2m1_312[_stride_pdfs_dst_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_11_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4] + _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackCoalescence_T {
+static FUNC_PREFIX void storagespecification_unpackCoalescence_T(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_21_36 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 6*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_36[_stride_pdfs_dst_0*ctr_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0] + _data_pdfs_dst_10_21_36[_stride_pdfs_dst_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_1m1_21_315 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 - _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 15*_stride_pdfs_dst_3;
+      _data_pdfs_dst_1m1_21_315[_stride_pdfs_dst_0*ctr_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1] + _data_pdfs_dst_1m1_21_315[_stride_pdfs_dst_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_11_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2] + _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0];
+      double * RESTRICT  _data_pdfs_dst_10_21_317 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 17*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3] + _data_pdfs_dst_10_21_317[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0];
+      double * RESTRICT  _data_pdfs_dst_10_21_318 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 18*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0] = 0.125*_data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4] + _data_pdfs_dst_10_21_318[_stride_pdfs_dst_0*ctr_0 - _stride_pdfs_dst_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackCoalescence_BE {
+static FUNC_PREFIX void storagespecification_unpackCoalescence_BE(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_10_2m1_313 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 - _stride_pdfs_dst_2 + 13*_stride_pdfs_dst_3;
+      _data_pdfs_dst_10_2m1_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0] = 0.125*_data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0] + _data_pdfs_dst_10_2m1_313[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_0];
+   } 
+}
+}
+
+namespace internal_storagespecification_unpackCoalescence_TN {
+static FUNC_PREFIX void storagespecification_unpackCoalescence_TN(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_dst_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_dst_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_dst_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_dst_11_21_316 = _data_pdfs_dst + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_2 + 16*_stride_pdfs_dst_3;
+      _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0] = 0.125*_data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0] + _data_pdfs_dst_11_21_316[_stride_pdfs_dst_0*ctr_0];
+   } 
+}
+}
+
+
+/*************************************************************************************
+ *                                 Kernel Wrappers
+*************************************************************************************/
+
+namespace walberla {
+namespace lbm {
+
+   void StorageSpecification::PackKernels::packAll(PdfField_T * pdfs_src, CellInterval & ci, unsigned char * outBuffer, gpuStream_t stream) const
+   {
+      double * buffer = reinterpret_cast<double*>(outBuffer);
+      double * RESTRICT  _data_buffer = buffer;
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      double * RESTRICT const _data_pdfs_src = pdfs_src->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_src->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+      const int64_t _size_pdfs_src_0 = int64_t(int64_c(ci.xSize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_src->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+      const int64_t _size_pdfs_src_1 = int64_t(int64_c(ci.ySize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_src->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+      const int64_t _size_pdfs_src_2 = int64_t(int64_c(ci.zSize()) + 0);
+      const int64_t _stride_pdfs_src_0 = int64_t(pdfs_src->xStride());
+      const int64_t _stride_pdfs_src_1 = int64_t(pdfs_src->yStride());
+      const int64_t _stride_pdfs_src_2 = int64_t(pdfs_src->zStride());
+      const int64_t _stride_pdfs_src_3 = int64_t(1 * int64_t(pdfs_src->fStride()));
+      dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)), uint32_c(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))));
+      dim3 _grid(uint32_c(( (_size_pdfs_src_0) % (1) == 0 ? (int64_t)(_size_pdfs_src_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_src_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_src_1) % (((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) == 0 ? (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) : ( (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) ) +1 )), uint32_c(( (_size_pdfs_src_2) % (((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) == 0 ? (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) : ( (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) ) +1 )));
+      internal_storagespecification_pack_ALL::storagespecification_pack_ALL<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+   }
+
+
+   void StorageSpecification::PackKernels::unpackAll(PdfField_T * pdfs_dst, CellInterval & ci, unsigned char * inBuffer, gpuStream_t stream) const
+   {
+      double * buffer = reinterpret_cast<double*>(inBuffer);
+      double * RESTRICT const _data_buffer = buffer;
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      double * RESTRICT  _data_pdfs_dst = pdfs_dst->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+      const int64_t _size_pdfs_dst_0 = int64_t(int64_c(ci.xSize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+      const int64_t _size_pdfs_dst_1 = int64_t(int64_c(ci.ySize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+      const int64_t _size_pdfs_dst_2 = int64_t(int64_c(ci.zSize()) + 0);
+      const int64_t _stride_pdfs_dst_0 = int64_t(pdfs_dst->xStride());
+      const int64_t _stride_pdfs_dst_1 = int64_t(pdfs_dst->yStride());
+      const int64_t _stride_pdfs_dst_2 = int64_t(pdfs_dst->zStride());
+      const int64_t _stride_pdfs_dst_3 = int64_t(1 * int64_t(pdfs_dst->fStride()));
+      dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+      dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+      internal_storagespecification_unpack_ALL::storagespecification_unpack_ALL<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+   }
+
+
+   void StorageSpecification::PackKernels::localCopyAll(PdfField_T * pdfs_src, CellInterval & srcInterval, PdfField_T * pdfs_dst, CellInterval & dstInterval, gpuStream_t stream) const
+   {
+      WALBERLA_ASSERT_EQUAL(srcInterval.xSize(), dstInterval.xSize())
+      WALBERLA_ASSERT_EQUAL(srcInterval.ySize(), dstInterval.ySize())
+      WALBERLA_ASSERT_EQUAL(srcInterval.zSize(), dstInterval.zSize())
+
+      WALBERLA_ASSERT_GREATER_EQUAL(dstInterval.xMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(dstInterval.yMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(dstInterval.zMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      double * RESTRICT  _data_pdfs_dst = pdfs_dst->dataAt(dstInterval.xMin(), dstInterval.yMin(), dstInterval.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(srcInterval.xMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(srcInterval.yMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(srcInterval.zMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      double * RESTRICT const _data_pdfs_src = pdfs_src->dataAt(srcInterval.xMin(), srcInterval.yMin(), srcInterval.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->xSizeWithGhostLayer(), int64_t(int64_c(dstInterval.xSize()) + 0))
+      const int64_t _size_pdfs_dst_0 = int64_t(int64_c(dstInterval.xSize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->ySizeWithGhostLayer(), int64_t(int64_c(dstInterval.ySize()) + 0))
+      const int64_t _size_pdfs_dst_1 = int64_t(int64_c(dstInterval.ySize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->zSizeWithGhostLayer(), int64_t(int64_c(dstInterval.zSize()) + 0))
+      const int64_t _size_pdfs_dst_2 = int64_t(int64_c(dstInterval.zSize()) + 0);
+      const int64_t _stride_pdfs_dst_0 = int64_t(pdfs_dst->xStride());
+      const int64_t _stride_pdfs_dst_1 = int64_t(pdfs_dst->yStride());
+      const int64_t _stride_pdfs_dst_2 = int64_t(pdfs_dst->zStride());
+      const int64_t _stride_pdfs_dst_3 = int64_t(1 * int64_t(pdfs_dst->fStride()));
+      const int64_t _stride_pdfs_src_0 = int64_t(pdfs_src->xStride());
+      const int64_t _stride_pdfs_src_1 = int64_t(pdfs_src->yStride());
+      const int64_t _stride_pdfs_src_2 = int64_t(pdfs_src->zStride());
+      const int64_t _stride_pdfs_src_3 = int64_t(1 * int64_t(pdfs_src->fStride()));
+      dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+      dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+      internal_storagespecification_localCopy_ALL::storagespecification_localCopy_ALL<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+   }
+
+   void StorageSpecification::PackKernels::packDirection(PdfField_T * pdfs_src, CellInterval & ci, unsigned char * outBuffer, stencil::Direction dir, gpuStream_t stream) const
+   {
+      double * buffer = reinterpret_cast<double*>(outBuffer);
+      double * RESTRICT  _data_buffer = buffer;
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      double * RESTRICT const _data_pdfs_src = pdfs_src->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_src->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+      const int64_t _size_pdfs_src_0 = int64_t(int64_c(ci.xSize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_src->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+      const int64_t _size_pdfs_src_1 = int64_t(int64_c(ci.ySize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_src->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+      const int64_t _size_pdfs_src_2 = int64_t(int64_c(ci.zSize()) + 0);
+      const int64_t _stride_pdfs_src_0 = int64_t(pdfs_src->xStride());
+      const int64_t _stride_pdfs_src_1 = int64_t(pdfs_src->yStride());
+      const int64_t _stride_pdfs_src_2 = int64_t(pdfs_src->zStride());
+      const int64_t _stride_pdfs_src_3 = int64_t(1 * int64_t(pdfs_src->fStride()));
+      switch (dir) {
+          case stencil::N : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)), uint32_c(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_src_0) % (1) == 0 ? (int64_t)(_size_pdfs_src_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_src_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_src_1) % (((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) == 0 ? (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) : ( (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) ) +1 )), uint32_c(( (_size_pdfs_src_2) % (((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) == 0 ? (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) : ( (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) ) +1 )));
+              internal_storagespecification_pack_N::storagespecification_pack_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::S : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)), uint32_c(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_src_0) % (1) == 0 ? (int64_t)(_size_pdfs_src_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_src_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_src_1) % (((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) == 0 ? (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) : ( (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) ) +1 )), uint32_c(( (_size_pdfs_src_2) % (((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) == 0 ? (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) : ( (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) ) +1 )));
+              internal_storagespecification_pack_S::storagespecification_pack_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::W : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)), uint32_c(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_src_0) % (1) == 0 ? (int64_t)(_size_pdfs_src_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_src_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_src_1) % (((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) == 0 ? (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) : ( (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) ) +1 )), uint32_c(( (_size_pdfs_src_2) % (((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) == 0 ? (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) : ( (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) ) +1 )));
+              internal_storagespecification_pack_W::storagespecification_pack_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::E : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)), uint32_c(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_src_0) % (1) == 0 ? (int64_t)(_size_pdfs_src_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_src_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_src_1) % (((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) == 0 ? (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) : ( (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) ) +1 )), uint32_c(( (_size_pdfs_src_2) % (((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) == 0 ? (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) : ( (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) ) +1 )));
+              internal_storagespecification_pack_E::storagespecification_pack_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::T : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)), uint32_c(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_src_0) % (1) == 0 ? (int64_t)(_size_pdfs_src_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_src_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_src_1) % (((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) == 0 ? (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) : ( (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) ) +1 )), uint32_c(( (_size_pdfs_src_2) % (((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) == 0 ? (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) : ( (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) ) +1 )));
+              internal_storagespecification_pack_T::storagespecification_pack_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::B : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)), uint32_c(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_src_0) % (1) == 0 ? (int64_t)(_size_pdfs_src_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_src_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_src_1) % (((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) == 0 ? (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) : ( (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) ) +1 )), uint32_c(( (_size_pdfs_src_2) % (((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) == 0 ? (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) : ( (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) ) +1 )));
+              internal_storagespecification_pack_B::storagespecification_pack_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::NW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)), uint32_c(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_src_0) % (1) == 0 ? (int64_t)(_size_pdfs_src_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_src_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_src_1) % (((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) == 0 ? (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) : ( (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) ) +1 )), uint32_c(( (_size_pdfs_src_2) % (((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) == 0 ? (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) : ( (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) ) +1 )));
+              internal_storagespecification_pack_NW::storagespecification_pack_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::NE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)), uint32_c(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_src_0) % (1) == 0 ? (int64_t)(_size_pdfs_src_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_src_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_src_1) % (((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) == 0 ? (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) : ( (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) ) +1 )), uint32_c(( (_size_pdfs_src_2) % (((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) == 0 ? (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) : ( (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) ) +1 )));
+              internal_storagespecification_pack_NE::storagespecification_pack_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::SW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)), uint32_c(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_src_0) % (1) == 0 ? (int64_t)(_size_pdfs_src_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_src_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_src_1) % (((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) == 0 ? (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) : ( (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) ) +1 )), uint32_c(( (_size_pdfs_src_2) % (((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) == 0 ? (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) : ( (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) ) +1 )));
+              internal_storagespecification_pack_SW::storagespecification_pack_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::SE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)), uint32_c(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_src_0) % (1) == 0 ? (int64_t)(_size_pdfs_src_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_src_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_src_1) % (((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) == 0 ? (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) : ( (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) ) +1 )), uint32_c(( (_size_pdfs_src_2) % (((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) == 0 ? (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) : ( (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) ) +1 )));
+              internal_storagespecification_pack_SE::storagespecification_pack_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TN : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)), uint32_c(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_src_0) % (1) == 0 ? (int64_t)(_size_pdfs_src_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_src_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_src_1) % (((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) == 0 ? (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) : ( (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) ) +1 )), uint32_c(( (_size_pdfs_src_2) % (((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) == 0 ? (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) : ( (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) ) +1 )));
+              internal_storagespecification_pack_TN::storagespecification_pack_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TS : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)), uint32_c(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_src_0) % (1) == 0 ? (int64_t)(_size_pdfs_src_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_src_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_src_1) % (((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) == 0 ? (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) : ( (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) ) +1 )), uint32_c(( (_size_pdfs_src_2) % (((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) == 0 ? (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) : ( (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) ) +1 )));
+              internal_storagespecification_pack_TS::storagespecification_pack_TS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)), uint32_c(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_src_0) % (1) == 0 ? (int64_t)(_size_pdfs_src_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_src_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_src_1) % (((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) == 0 ? (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) : ( (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) ) +1 )), uint32_c(( (_size_pdfs_src_2) % (((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) == 0 ? (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) : ( (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) ) +1 )));
+              internal_storagespecification_pack_TW::storagespecification_pack_TW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)), uint32_c(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_src_0) % (1) == 0 ? (int64_t)(_size_pdfs_src_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_src_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_src_1) % (((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) == 0 ? (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) : ( (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) ) +1 )), uint32_c(( (_size_pdfs_src_2) % (((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) == 0 ? (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) : ( (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) ) +1 )));
+              internal_storagespecification_pack_TE::storagespecification_pack_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BN : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)), uint32_c(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_src_0) % (1) == 0 ? (int64_t)(_size_pdfs_src_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_src_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_src_1) % (((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) == 0 ? (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) : ( (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) ) +1 )), uint32_c(( (_size_pdfs_src_2) % (((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) == 0 ? (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) : ( (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) ) +1 )));
+              internal_storagespecification_pack_BN::storagespecification_pack_BN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BS : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)), uint32_c(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_src_0) % (1) == 0 ? (int64_t)(_size_pdfs_src_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_src_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_src_1) % (((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) == 0 ? (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) : ( (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) ) +1 )), uint32_c(( (_size_pdfs_src_2) % (((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) == 0 ? (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) : ( (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) ) +1 )));
+              internal_storagespecification_pack_BS::storagespecification_pack_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)), uint32_c(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_src_0) % (1) == 0 ? (int64_t)(_size_pdfs_src_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_src_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_src_1) % (((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) == 0 ? (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) : ( (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) ) +1 )), uint32_c(( (_size_pdfs_src_2) % (((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) == 0 ? (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) : ( (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) ) +1 )));
+              internal_storagespecification_pack_BW::storagespecification_pack_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)), uint32_c(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_src_0) % (1) == 0 ? (int64_t)(_size_pdfs_src_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_src_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_src_1) % (((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) == 0 ? (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) : ( (int64_t)(_size_pdfs_src_1) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)) ) +1 )), uint32_c(( (_size_pdfs_src_2) % (((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) == 0 ? (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) : ( (int64_t)(_size_pdfs_src_2) / (int64_t)(((64 < ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1))))) ? 64 : ((_size_pdfs_src_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))) ? _size_pdfs_src_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_src_1) ? 16 : _size_pdfs_src_1)))))) ) +1 )));
+              internal_storagespecification_pack_BE::storagespecification_pack_BE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }default: break; 
+      }
+   }
+
+   void StorageSpecification::PackKernels::unpackDirection(PdfField_T * pdfs_dst, CellInterval & ci, unsigned char * inBuffer, stencil::Direction dir, gpuStream_t stream) const
+   {
+      double * buffer = reinterpret_cast<double*>(inBuffer);
+      double * RESTRICT const _data_buffer = buffer;
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      double * RESTRICT  _data_pdfs_dst = pdfs_dst->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+      const int64_t _size_pdfs_dst_0 = int64_t(int64_c(ci.xSize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+      const int64_t _size_pdfs_dst_1 = int64_t(int64_c(ci.ySize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+      const int64_t _size_pdfs_dst_2 = int64_t(int64_c(ci.zSize()) + 0);
+      const int64_t _stride_pdfs_dst_0 = int64_t(pdfs_dst->xStride());
+      const int64_t _stride_pdfs_dst_1 = int64_t(pdfs_dst->yStride());
+      const int64_t _stride_pdfs_dst_2 = int64_t(pdfs_dst->zStride());
+      const int64_t _stride_pdfs_dst_3 = int64_t(1 * int64_t(pdfs_dst->fStride()));
+      switch (dir) {
+          case stencil::N : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpack_N::storagespecification_unpack_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::S : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpack_S::storagespecification_unpack_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::W : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpack_W::storagespecification_unpack_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::E : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpack_E::storagespecification_unpack_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::T : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpack_T::storagespecification_unpack_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::B : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpack_B::storagespecification_unpack_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::NW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpack_NW::storagespecification_unpack_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::NE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpack_NE::storagespecification_unpack_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::SW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpack_SW::storagespecification_unpack_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::SE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpack_SE::storagespecification_unpack_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TN : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpack_TN::storagespecification_unpack_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TS : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpack_TS::storagespecification_unpack_TS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpack_TW::storagespecification_unpack_TW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpack_TE::storagespecification_unpack_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BN : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpack_BN::storagespecification_unpack_BN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BS : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpack_BS::storagespecification_unpack_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpack_BW::storagespecification_unpack_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpack_BE::storagespecification_unpack_BE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }default: break; 
+      }
+   }
+
+   void StorageSpecification::PackKernels::localCopyDirection(PdfField_T * pdfs_src, CellInterval & srcInterval, PdfField_T * pdfs_dst, CellInterval & dstInterval, stencil::Direction dir, gpuStream_t stream) const
+   {
+      WALBERLA_ASSERT_EQUAL(srcInterval.xSize(), dstInterval.xSize())
+      WALBERLA_ASSERT_EQUAL(srcInterval.ySize(), dstInterval.ySize())
+      WALBERLA_ASSERT_EQUAL(srcInterval.zSize(), dstInterval.zSize())
+
+      WALBERLA_ASSERT_GREATER_EQUAL(dstInterval.xMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(dstInterval.yMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(dstInterval.zMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      double * RESTRICT  _data_pdfs_dst = pdfs_dst->dataAt(dstInterval.xMin(), dstInterval.yMin(), dstInterval.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(srcInterval.xMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(srcInterval.yMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(srcInterval.zMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      double * RESTRICT const _data_pdfs_src = pdfs_src->dataAt(srcInterval.xMin(), srcInterval.yMin(), srcInterval.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->xSizeWithGhostLayer(), int64_t(int64_c(dstInterval.xSize()) + 0))
+      const int64_t _size_pdfs_dst_0 = int64_t(int64_c(dstInterval.xSize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->ySizeWithGhostLayer(), int64_t(int64_c(dstInterval.ySize()) + 0))
+      const int64_t _size_pdfs_dst_1 = int64_t(int64_c(dstInterval.ySize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->zSizeWithGhostLayer(), int64_t(int64_c(dstInterval.zSize()) + 0))
+      const int64_t _size_pdfs_dst_2 = int64_t(int64_c(dstInterval.zSize()) + 0);
+      const int64_t _stride_pdfs_dst_0 = int64_t(pdfs_dst->xStride());
+      const int64_t _stride_pdfs_dst_1 = int64_t(pdfs_dst->yStride());
+      const int64_t _stride_pdfs_dst_2 = int64_t(pdfs_dst->zStride());
+      const int64_t _stride_pdfs_dst_3 = int64_t(1 * int64_t(pdfs_dst->fStride()));
+      const int64_t _stride_pdfs_src_0 = int64_t(pdfs_src->xStride());
+      const int64_t _stride_pdfs_src_1 = int64_t(pdfs_src->yStride());
+      const int64_t _stride_pdfs_src_2 = int64_t(pdfs_src->zStride());
+      const int64_t _stride_pdfs_src_3 = int64_t(1 * int64_t(pdfs_src->fStride()));
+      switch (dir) {
+          case stencil::N : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_localCopy_N::storagespecification_localCopy_N<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::S : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_localCopy_S::storagespecification_localCopy_S<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::W : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_localCopy_W::storagespecification_localCopy_W<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::E : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_localCopy_E::storagespecification_localCopy_E<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::T : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_localCopy_T::storagespecification_localCopy_T<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::B : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_localCopy_B::storagespecification_localCopy_B<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::NW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_localCopy_NW::storagespecification_localCopy_NW<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::NE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_localCopy_NE::storagespecification_localCopy_NE<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::SW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_localCopy_SW::storagespecification_localCopy_SW<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::SE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_localCopy_SE::storagespecification_localCopy_SE<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TN : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_localCopy_TN::storagespecification_localCopy_TN<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TS : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_localCopy_TS::storagespecification_localCopy_TS<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_localCopy_TW::storagespecification_localCopy_TW<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_localCopy_TE::storagespecification_localCopy_TE<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BN : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_localCopy_BN::storagespecification_localCopy_BN<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BS : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_localCopy_BS::storagespecification_localCopy_BS<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_localCopy_BW::storagespecification_localCopy_BW<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_localCopy_BE::storagespecification_localCopy_BE<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }default: break; 
+      }
+   }
+
+   void StorageSpecification::PackKernels::unpackRedistribute(PdfField_T * pdfs_dst, CellInterval & ci, unsigned char * inBuffer, stencil::Direction dir, gpuStream_t stream) const
+   {
+      double * buffer = reinterpret_cast<double*>(inBuffer);
+      double * RESTRICT const _data_buffer = buffer;
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      double * RESTRICT  _data_pdfs_dst = pdfs_dst->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+      const int64_t _size_pdfs_dst_0 = int64_t(int64_c(ci.xSize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+      const int64_t _size_pdfs_dst_1 = int64_t(int64_c(ci.ySize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+      const int64_t _size_pdfs_dst_2 = int64_t(int64_c(ci.zSize()) + 0);
+      const int64_t _stride_pdfs_dst_0 = int64_t(pdfs_dst->xStride());
+      const int64_t _stride_pdfs_dst_1 = int64_t(pdfs_dst->yStride());
+      const int64_t _stride_pdfs_dst_2 = int64_t(pdfs_dst->zStride());
+      const int64_t _stride_pdfs_dst_3 = int64_t(1 * int64_t(pdfs_dst->fStride()));
+      switch (dir) {
+          case stencil::N : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackRedistribute_N::storagespecification_unpackRedistribute_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::S : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackRedistribute_S::storagespecification_unpackRedistribute_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::W : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackRedistribute_W::storagespecification_unpackRedistribute_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::E : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackRedistribute_E::storagespecification_unpackRedistribute_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::T : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackRedistribute_T::storagespecification_unpackRedistribute_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::B : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackRedistribute_B::storagespecification_unpackRedistribute_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::NW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackRedistribute_NW::storagespecification_unpackRedistribute_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::NE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackRedistribute_NE::storagespecification_unpackRedistribute_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::SW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackRedistribute_SW::storagespecification_unpackRedistribute_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::SE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackRedistribute_SE::storagespecification_unpackRedistribute_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TN : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackRedistribute_TN::storagespecification_unpackRedistribute_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TS : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackRedistribute_TS::storagespecification_unpackRedistribute_TS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackRedistribute_TW::storagespecification_unpackRedistribute_TW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackRedistribute_TE::storagespecification_unpackRedistribute_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BN : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackRedistribute_BN::storagespecification_unpackRedistribute_BN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BS : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackRedistribute_BS::storagespecification_unpackRedistribute_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackRedistribute_BW::storagespecification_unpackRedistribute_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackRedistribute_BE::storagespecification_unpackRedistribute_BE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }default: break; 
+      }
+   }
+
+   void StorageSpecification::PackKernels::packPartialCoalescence(PdfField_T * pdfs_src, MaskField_T * mask, CellInterval & ci, unsigned char * outBuffer, stencil::Direction dir, gpuStream_t stream) const
+   {
+      double * buffer = reinterpret_cast<double*>(outBuffer);
+      double * RESTRICT  _data_buffer = buffer;
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(mask->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(mask->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(mask->nrOfGhostLayers()))
+      uint32_t * RESTRICT const _data_mask = mask->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      double * RESTRICT const _data_pdfs_src = pdfs_src->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(mask->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+      const int64_t _size_mask_0 = int64_t(int64_c(ci.xSize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(mask->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+      const int64_t _size_mask_1 = int64_t(int64_c(ci.ySize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(mask->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+      const int64_t _size_mask_2 = int64_t(int64_c(ci.zSize()) + 0);
+      const int64_t _stride_mask_0 = int64_t(mask->xStride());
+      const int64_t _stride_mask_1 = int64_t(mask->yStride());
+      const int64_t _stride_mask_2 = int64_t(mask->zStride());
+      const int64_t _stride_pdfs_src_0 = int64_t(pdfs_src->xStride());
+      const int64_t _stride_pdfs_src_1 = int64_t(pdfs_src->yStride());
+      const int64_t _stride_pdfs_src_2 = int64_t(pdfs_src->zStride());
+      const int64_t _stride_pdfs_src_3 = int64_t(1 * int64_t(pdfs_src->fStride()));
+      switch (dir) {
+          case stencil::N : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_mask_1) ? 16 : _size_mask_1)), uint32_c(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))));
+              dim3 _grid(uint32_c(( (_size_mask_0) % (1) == 0 ? (int64_t)(_size_mask_0) / (int64_t)(1) : ( (int64_t)(_size_mask_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_mask_1) % (((16 < _size_mask_1) ? 16 : _size_mask_1)) == 0 ? (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) : ( (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) ) +1 )), uint32_c(( (_size_mask_2) % (((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) == 0 ? (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) : ( (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) ) +1 )));
+              internal_storagespecification_packPartialCoalescence_N::storagespecification_packPartialCoalescence_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_mask, _data_pdfs_src, _size_mask_0, _size_mask_1, _size_mask_2, _stride_mask_0, _stride_mask_1, _stride_mask_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::S : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_mask_1) ? 16 : _size_mask_1)), uint32_c(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))));
+              dim3 _grid(uint32_c(( (_size_mask_0) % (1) == 0 ? (int64_t)(_size_mask_0) / (int64_t)(1) : ( (int64_t)(_size_mask_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_mask_1) % (((16 < _size_mask_1) ? 16 : _size_mask_1)) == 0 ? (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) : ( (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) ) +1 )), uint32_c(( (_size_mask_2) % (((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) == 0 ? (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) : ( (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) ) +1 )));
+              internal_storagespecification_packPartialCoalescence_S::storagespecification_packPartialCoalescence_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_mask, _data_pdfs_src, _size_mask_0, _size_mask_1, _size_mask_2, _stride_mask_0, _stride_mask_1, _stride_mask_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::W : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_mask_1) ? 16 : _size_mask_1)), uint32_c(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))));
+              dim3 _grid(uint32_c(( (_size_mask_0) % (1) == 0 ? (int64_t)(_size_mask_0) / (int64_t)(1) : ( (int64_t)(_size_mask_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_mask_1) % (((16 < _size_mask_1) ? 16 : _size_mask_1)) == 0 ? (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) : ( (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) ) +1 )), uint32_c(( (_size_mask_2) % (((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) == 0 ? (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) : ( (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) ) +1 )));
+              internal_storagespecification_packPartialCoalescence_W::storagespecification_packPartialCoalescence_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_mask, _data_pdfs_src, _size_mask_0, _size_mask_1, _size_mask_2, _stride_mask_0, _stride_mask_1, _stride_mask_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::E : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_mask_1) ? 16 : _size_mask_1)), uint32_c(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))));
+              dim3 _grid(uint32_c(( (_size_mask_0) % (1) == 0 ? (int64_t)(_size_mask_0) / (int64_t)(1) : ( (int64_t)(_size_mask_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_mask_1) % (((16 < _size_mask_1) ? 16 : _size_mask_1)) == 0 ? (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) : ( (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) ) +1 )), uint32_c(( (_size_mask_2) % (((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) == 0 ? (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) : ( (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) ) +1 )));
+              internal_storagespecification_packPartialCoalescence_E::storagespecification_packPartialCoalescence_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_mask, _data_pdfs_src, _size_mask_0, _size_mask_1, _size_mask_2, _stride_mask_0, _stride_mask_1, _stride_mask_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::T : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_mask_1) ? 16 : _size_mask_1)), uint32_c(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))));
+              dim3 _grid(uint32_c(( (_size_mask_0) % (1) == 0 ? (int64_t)(_size_mask_0) / (int64_t)(1) : ( (int64_t)(_size_mask_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_mask_1) % (((16 < _size_mask_1) ? 16 : _size_mask_1)) == 0 ? (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) : ( (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) ) +1 )), uint32_c(( (_size_mask_2) % (((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) == 0 ? (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) : ( (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) ) +1 )));
+              internal_storagespecification_packPartialCoalescence_T::storagespecification_packPartialCoalescence_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_mask, _data_pdfs_src, _size_mask_0, _size_mask_1, _size_mask_2, _stride_mask_0, _stride_mask_1, _stride_mask_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::B : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_mask_1) ? 16 : _size_mask_1)), uint32_c(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))));
+              dim3 _grid(uint32_c(( (_size_mask_0) % (1) == 0 ? (int64_t)(_size_mask_0) / (int64_t)(1) : ( (int64_t)(_size_mask_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_mask_1) % (((16 < _size_mask_1) ? 16 : _size_mask_1)) == 0 ? (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) : ( (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) ) +1 )), uint32_c(( (_size_mask_2) % (((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) == 0 ? (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) : ( (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) ) +1 )));
+              internal_storagespecification_packPartialCoalescence_B::storagespecification_packPartialCoalescence_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_mask, _data_pdfs_src, _size_mask_0, _size_mask_1, _size_mask_2, _stride_mask_0, _stride_mask_1, _stride_mask_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::NW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_mask_1) ? 16 : _size_mask_1)), uint32_c(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))));
+              dim3 _grid(uint32_c(( (_size_mask_0) % (1) == 0 ? (int64_t)(_size_mask_0) / (int64_t)(1) : ( (int64_t)(_size_mask_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_mask_1) % (((16 < _size_mask_1) ? 16 : _size_mask_1)) == 0 ? (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) : ( (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) ) +1 )), uint32_c(( (_size_mask_2) % (((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) == 0 ? (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) : ( (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) ) +1 )));
+              internal_storagespecification_packPartialCoalescence_NW::storagespecification_packPartialCoalescence_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_mask, _data_pdfs_src, _size_mask_0, _size_mask_1, _size_mask_2, _stride_mask_0, _stride_mask_1, _stride_mask_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::NE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_mask_1) ? 16 : _size_mask_1)), uint32_c(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))));
+              dim3 _grid(uint32_c(( (_size_mask_0) % (1) == 0 ? (int64_t)(_size_mask_0) / (int64_t)(1) : ( (int64_t)(_size_mask_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_mask_1) % (((16 < _size_mask_1) ? 16 : _size_mask_1)) == 0 ? (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) : ( (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) ) +1 )), uint32_c(( (_size_mask_2) % (((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) == 0 ? (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) : ( (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) ) +1 )));
+              internal_storagespecification_packPartialCoalescence_NE::storagespecification_packPartialCoalescence_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_mask, _data_pdfs_src, _size_mask_0, _size_mask_1, _size_mask_2, _stride_mask_0, _stride_mask_1, _stride_mask_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::SW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_mask_1) ? 16 : _size_mask_1)), uint32_c(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))));
+              dim3 _grid(uint32_c(( (_size_mask_0) % (1) == 0 ? (int64_t)(_size_mask_0) / (int64_t)(1) : ( (int64_t)(_size_mask_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_mask_1) % (((16 < _size_mask_1) ? 16 : _size_mask_1)) == 0 ? (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) : ( (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) ) +1 )), uint32_c(( (_size_mask_2) % (((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) == 0 ? (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) : ( (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) ) +1 )));
+              internal_storagespecification_packPartialCoalescence_SW::storagespecification_packPartialCoalescence_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_mask, _data_pdfs_src, _size_mask_0, _size_mask_1, _size_mask_2, _stride_mask_0, _stride_mask_1, _stride_mask_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::SE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_mask_1) ? 16 : _size_mask_1)), uint32_c(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))));
+              dim3 _grid(uint32_c(( (_size_mask_0) % (1) == 0 ? (int64_t)(_size_mask_0) / (int64_t)(1) : ( (int64_t)(_size_mask_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_mask_1) % (((16 < _size_mask_1) ? 16 : _size_mask_1)) == 0 ? (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) : ( (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) ) +1 )), uint32_c(( (_size_mask_2) % (((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) == 0 ? (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) : ( (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) ) +1 )));
+              internal_storagespecification_packPartialCoalescence_SE::storagespecification_packPartialCoalescence_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_mask, _data_pdfs_src, _size_mask_0, _size_mask_1, _size_mask_2, _stride_mask_0, _stride_mask_1, _stride_mask_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TN : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_mask_1) ? 16 : _size_mask_1)), uint32_c(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))));
+              dim3 _grid(uint32_c(( (_size_mask_0) % (1) == 0 ? (int64_t)(_size_mask_0) / (int64_t)(1) : ( (int64_t)(_size_mask_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_mask_1) % (((16 < _size_mask_1) ? 16 : _size_mask_1)) == 0 ? (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) : ( (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) ) +1 )), uint32_c(( (_size_mask_2) % (((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) == 0 ? (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) : ( (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) ) +1 )));
+              internal_storagespecification_packPartialCoalescence_TN::storagespecification_packPartialCoalescence_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_mask, _data_pdfs_src, _size_mask_0, _size_mask_1, _size_mask_2, _stride_mask_0, _stride_mask_1, _stride_mask_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TS : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_mask_1) ? 16 : _size_mask_1)), uint32_c(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))));
+              dim3 _grid(uint32_c(( (_size_mask_0) % (1) == 0 ? (int64_t)(_size_mask_0) / (int64_t)(1) : ( (int64_t)(_size_mask_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_mask_1) % (((16 < _size_mask_1) ? 16 : _size_mask_1)) == 0 ? (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) : ( (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) ) +1 )), uint32_c(( (_size_mask_2) % (((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) == 0 ? (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) : ( (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) ) +1 )));
+              internal_storagespecification_packPartialCoalescence_TS::storagespecification_packPartialCoalescence_TS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_mask, _data_pdfs_src, _size_mask_0, _size_mask_1, _size_mask_2, _stride_mask_0, _stride_mask_1, _stride_mask_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_mask_1) ? 16 : _size_mask_1)), uint32_c(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))));
+              dim3 _grid(uint32_c(( (_size_mask_0) % (1) == 0 ? (int64_t)(_size_mask_0) / (int64_t)(1) : ( (int64_t)(_size_mask_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_mask_1) % (((16 < _size_mask_1) ? 16 : _size_mask_1)) == 0 ? (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) : ( (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) ) +1 )), uint32_c(( (_size_mask_2) % (((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) == 0 ? (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) : ( (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) ) +1 )));
+              internal_storagespecification_packPartialCoalescence_TW::storagespecification_packPartialCoalescence_TW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_mask, _data_pdfs_src, _size_mask_0, _size_mask_1, _size_mask_2, _stride_mask_0, _stride_mask_1, _stride_mask_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_mask_1) ? 16 : _size_mask_1)), uint32_c(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))));
+              dim3 _grid(uint32_c(( (_size_mask_0) % (1) == 0 ? (int64_t)(_size_mask_0) / (int64_t)(1) : ( (int64_t)(_size_mask_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_mask_1) % (((16 < _size_mask_1) ? 16 : _size_mask_1)) == 0 ? (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) : ( (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) ) +1 )), uint32_c(( (_size_mask_2) % (((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) == 0 ? (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) : ( (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) ) +1 )));
+              internal_storagespecification_packPartialCoalescence_TE::storagespecification_packPartialCoalescence_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_mask, _data_pdfs_src, _size_mask_0, _size_mask_1, _size_mask_2, _stride_mask_0, _stride_mask_1, _stride_mask_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BN : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_mask_1) ? 16 : _size_mask_1)), uint32_c(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))));
+              dim3 _grid(uint32_c(( (_size_mask_0) % (1) == 0 ? (int64_t)(_size_mask_0) / (int64_t)(1) : ( (int64_t)(_size_mask_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_mask_1) % (((16 < _size_mask_1) ? 16 : _size_mask_1)) == 0 ? (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) : ( (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) ) +1 )), uint32_c(( (_size_mask_2) % (((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) == 0 ? (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) : ( (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) ) +1 )));
+              internal_storagespecification_packPartialCoalescence_BN::storagespecification_packPartialCoalescence_BN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_mask, _data_pdfs_src, _size_mask_0, _size_mask_1, _size_mask_2, _stride_mask_0, _stride_mask_1, _stride_mask_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BS : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_mask_1) ? 16 : _size_mask_1)), uint32_c(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))));
+              dim3 _grid(uint32_c(( (_size_mask_0) % (1) == 0 ? (int64_t)(_size_mask_0) / (int64_t)(1) : ( (int64_t)(_size_mask_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_mask_1) % (((16 < _size_mask_1) ? 16 : _size_mask_1)) == 0 ? (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) : ( (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) ) +1 )), uint32_c(( (_size_mask_2) % (((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) == 0 ? (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) : ( (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) ) +1 )));
+              internal_storagespecification_packPartialCoalescence_BS::storagespecification_packPartialCoalescence_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_mask, _data_pdfs_src, _size_mask_0, _size_mask_1, _size_mask_2, _stride_mask_0, _stride_mask_1, _stride_mask_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_mask_1) ? 16 : _size_mask_1)), uint32_c(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))));
+              dim3 _grid(uint32_c(( (_size_mask_0) % (1) == 0 ? (int64_t)(_size_mask_0) / (int64_t)(1) : ( (int64_t)(_size_mask_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_mask_1) % (((16 < _size_mask_1) ? 16 : _size_mask_1)) == 0 ? (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) : ( (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) ) +1 )), uint32_c(( (_size_mask_2) % (((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) == 0 ? (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) : ( (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) ) +1 )));
+              internal_storagespecification_packPartialCoalescence_BW::storagespecification_packPartialCoalescence_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_mask, _data_pdfs_src, _size_mask_0, _size_mask_1, _size_mask_2, _stride_mask_0, _stride_mask_1, _stride_mask_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_mask_1) ? 16 : _size_mask_1)), uint32_c(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))));
+              dim3 _grid(uint32_c(( (_size_mask_0) % (1) == 0 ? (int64_t)(_size_mask_0) / (int64_t)(1) : ( (int64_t)(_size_mask_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_mask_1) % (((16 < _size_mask_1) ? 16 : _size_mask_1)) == 0 ? (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) : ( (int64_t)(_size_mask_1) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)) ) +1 )), uint32_c(( (_size_mask_2) % (((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) == 0 ? (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) : ( (int64_t)(_size_mask_2) / (int64_t)(((64 < ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1))))) ? 64 : ((_size_mask_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))) ? _size_mask_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_mask_1) ? 16 : _size_mask_1)))))) ) +1 )));
+              internal_storagespecification_packPartialCoalescence_BE::storagespecification_packPartialCoalescence_BE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_mask, _data_pdfs_src, _size_mask_0, _size_mask_1, _size_mask_2, _stride_mask_0, _stride_mask_1, _stride_mask_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }default: break; 
+      }
+   }
+
+   void StorageSpecification::PackKernels::zeroCoalescenceRegion(PdfField_T * pdfs_dst, CellInterval & ci, stencil::Direction dir, gpuStream_t stream) const
+   {
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      double * RESTRICT  _data_pdfs_dst = pdfs_dst->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+      const int64_t _size_pdfs_dst_0 = int64_t(int64_c(ci.xSize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+      const int64_t _size_pdfs_dst_1 = int64_t(int64_c(ci.ySize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+      const int64_t _size_pdfs_dst_2 = int64_t(int64_c(ci.zSize()) + 0);
+      const int64_t _stride_pdfs_dst_0 = int64_t(pdfs_dst->xStride());
+      const int64_t _stride_pdfs_dst_1 = int64_t(pdfs_dst->yStride());
+      const int64_t _stride_pdfs_dst_2 = int64_t(pdfs_dst->zStride());
+      const int64_t _stride_pdfs_dst_3 = int64_t(1 * int64_t(pdfs_dst->fStride()));
+      switch (dir) {
+          case stencil::N : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_zeroCoalescenceRegion_N::storagespecification_zeroCoalescenceRegion_N<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::S : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_zeroCoalescenceRegion_S::storagespecification_zeroCoalescenceRegion_S<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::W : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_zeroCoalescenceRegion_W::storagespecification_zeroCoalescenceRegion_W<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::E : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_zeroCoalescenceRegion_E::storagespecification_zeroCoalescenceRegion_E<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::T : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_zeroCoalescenceRegion_T::storagespecification_zeroCoalescenceRegion_T<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::B : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_zeroCoalescenceRegion_B::storagespecification_zeroCoalescenceRegion_B<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::NW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_zeroCoalescenceRegion_NW::storagespecification_zeroCoalescenceRegion_NW<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::NE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_zeroCoalescenceRegion_NE::storagespecification_zeroCoalescenceRegion_NE<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::SW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_zeroCoalescenceRegion_SW::storagespecification_zeroCoalescenceRegion_SW<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::SE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_zeroCoalescenceRegion_SE::storagespecification_zeroCoalescenceRegion_SE<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TN : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_zeroCoalescenceRegion_TN::storagespecification_zeroCoalescenceRegion_TN<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TS : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_zeroCoalescenceRegion_TS::storagespecification_zeroCoalescenceRegion_TS<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_zeroCoalescenceRegion_TW::storagespecification_zeroCoalescenceRegion_TW<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_zeroCoalescenceRegion_TE::storagespecification_zeroCoalescenceRegion_TE<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BN : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_zeroCoalescenceRegion_BN::storagespecification_zeroCoalescenceRegion_BN<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BS : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_zeroCoalescenceRegion_BS::storagespecification_zeroCoalescenceRegion_BS<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_zeroCoalescenceRegion_BW::storagespecification_zeroCoalescenceRegion_BW<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_zeroCoalescenceRegion_BE::storagespecification_zeroCoalescenceRegion_BE<<<_grid, _block, 0, stream>>>(_data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }default: break; 
+      }
+   }
+
+   void StorageSpecification::PackKernels::unpackCoalescence(PdfField_T * pdfs_dst, CellInterval & ci, unsigned char * inBuffer, stencil::Direction dir, gpuStream_t stream) const
+   {
+      double * buffer = reinterpret_cast<double*>(inBuffer);
+      double * RESTRICT const _data_buffer = buffer;
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      double * RESTRICT  _data_pdfs_dst = pdfs_dst->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+      const int64_t _size_pdfs_dst_0 = int64_t(int64_c(ci.xSize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+      const int64_t _size_pdfs_dst_1 = int64_t(int64_c(ci.ySize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+      const int64_t _size_pdfs_dst_2 = int64_t(int64_c(ci.zSize()) + 0);
+      const int64_t _stride_pdfs_dst_0 = int64_t(pdfs_dst->xStride());
+      const int64_t _stride_pdfs_dst_1 = int64_t(pdfs_dst->yStride());
+      const int64_t _stride_pdfs_dst_2 = int64_t(pdfs_dst->zStride());
+      const int64_t _stride_pdfs_dst_3 = int64_t(1 * int64_t(pdfs_dst->fStride()));
+      switch (dir) {
+          case stencil::N : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackCoalescence_N::storagespecification_unpackCoalescence_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::S : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackCoalescence_S::storagespecification_unpackCoalescence_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::W : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackCoalescence_W::storagespecification_unpackCoalescence_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::E : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackCoalescence_E::storagespecification_unpackCoalescence_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::T : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackCoalescence_T::storagespecification_unpackCoalescence_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::B : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackCoalescence_B::storagespecification_unpackCoalescence_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::NW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackCoalescence_NW::storagespecification_unpackCoalescence_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::NE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackCoalescence_NE::storagespecification_unpackCoalescence_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::SW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackCoalescence_SW::storagespecification_unpackCoalescence_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::SE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackCoalescence_SE::storagespecification_unpackCoalescence_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TN : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackCoalescence_TN::storagespecification_unpackCoalescence_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TS : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackCoalescence_TS::storagespecification_unpackCoalescence_TS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackCoalescence_TW::storagespecification_unpackCoalescence_TW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackCoalescence_TE::storagespecification_unpackCoalescence_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BN : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackCoalescence_BN::storagespecification_unpackCoalescence_BN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BS : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackCoalescence_BS::storagespecification_unpackCoalescence_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BW : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackCoalescence_BW::storagespecification_unpackCoalescence_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BE : {
+              dim3 _block(uint32_c(1), uint32_c(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)), uint32_c(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))));
+              dim3 _grid(uint32_c(( (_size_pdfs_dst_0) % (1) == 0 ? (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) : ( (int64_t)(_size_pdfs_dst_0) / (int64_t)(1) ) +1 )), uint32_c(( (_size_pdfs_dst_1) % (((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) == 0 ? (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) : ( (int64_t)(_size_pdfs_dst_1) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)) ) +1 )), uint32_c(( (_size_pdfs_dst_2) % (((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) == 0 ? (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) : ( (int64_t)(_size_pdfs_dst_2) / (int64_t)(((64 < ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1))))) ? 64 : ((_size_pdfs_dst_2 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))) ? _size_pdfs_dst_2 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_dst_1) ? 16 : _size_pdfs_dst_1)))))) ) +1 )));
+              internal_storagespecification_unpackCoalescence_BE::storagespecification_unpackCoalescence_BE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }default: break; 
+      }
+   }
+}  // namespace lbm
+}  // namespace walberla
\ No newline at end of file
diff --git a/apps/benchmarks/GridRefSphere/StorageSpecification.h b/apps/benchmarks/GridRefSphere/StorageSpecification.h
new file mode 100644
index 000000000..50291632e
--- /dev/null
+++ b/apps/benchmarks/GridRefSphere/StorageSpecification.h
@@ -0,0 +1,190 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file StorageSpecification.h
+//! \\author lbmpy
+//======================================================================================================================
+
+#pragma once
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+#include "core/mpi/SendBuffer.h"
+#include "core/mpi/RecvBuffer.h"
+
+#include "domain_decomposition/IBlock.h"
+#include "field/GhostLayerField.h"
+
+#include "stencil/D3Q19.h"
+#include "stencil/Directions.h"
+
+#define FUNC_PREFIX __global__
+#include "gpu/GPUWrapper.h"
+#include "gpu/GPUField.h"
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if defined WALBERLA_CXX_COMPILER_IS_GNU || defined WALBERLA_CXX_COMPILER_IS_CLANG
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#endif
+
+namespace walberla
+{
+namespace lbm{
+
+class StorageSpecification
+{
+ public:
+   // Used lattice stencil
+   using Stencil = stencil::D3Q19;
+   // Lattice stencil used for the communication (should be used to define which block directions need to be communicated)
+   using CommunicationStencil = stencil::D3Q19;
+   // If false used correction: Lattice Boltzmann Model for the Incompressible Navier–Stokes Equation, He 1997
+   static const bool compressible = false;
+   // Cut off for the lattice Boltzmann equilibrium
+   static const int equilibriumAccuracyOrder = 2;
+   // If true the equilibrium is computed in regard to "delta_rho" and not the actual density "rho"
+   static const bool equilibriumDeviationOnly = false;
+   // If streaming pattern is inplace (esotwist, aa, ...) or not (pull, push)
+   static const bool inplace = false;
+   // If true the background deviation (rho_0 = 1) is subtracted for the collision step.
+   static const bool zeroCenteredPDFs = false;
+   // Lattice weights
+   static constexpr double w[19] = { 0.333333333333333,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778 };
+   // Inverse lattice weights
+   static constexpr double wInv[19] = { 3.00000000000000,18.0000000000000,18.0000000000000,18.0000000000000,18.0000000000000,18.0000000000000,18.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000 };
+
+   // Compute kernels to pack and unpack MPI buffers
+   class PackKernels {
+
+    public:
+      using PdfField_T = gpu::GPUField<double>;
+      using value_type = typename PdfField_T::value_type;
+
+      using MaskField_T = gpu::GPUField< uint32_t >;
+
+      static const bool inplace = false;
+
+      /**
+       * Packs all pdfs from the given cell interval to the send buffer.
+       * */
+      void packAll(PdfField_T * pdfs_src, CellInterval & ci, unsigned char * outBuffer, gpuStream_t stream = nullptr) const;
+
+      /**
+       * Unpacks all pdfs from the send buffer to the given cell interval.
+       * */
+      void unpackAll(PdfField_T * pdfs_dst, CellInterval & ci, unsigned char * inBuffer, gpuStream_t stream = nullptr) const;
+
+      /**
+       * Copies data between two blocks on the same process.
+       * All pdfs from the sending interval are copied onto the receiving interval.
+       * */
+      void localCopyAll(PdfField_T * pdfs_src, CellInterval & srcInterval, PdfField_T * pdfs_dst, CellInterval & dstInterval, gpuStream_t stream = nullptr) const;
+
+      /**
+       * Packs only those populations streaming in directions aligned with the sending direction dir from the given cell interval.
+       * For example, in 2D, if dir == N, the pdfs streaming in directions NW, N, NE are packed.
+       * */
+      void packDirection(PdfField_T * pdfs_src, CellInterval & ci, unsigned char * outBuffer, stencil::Direction dir, gpuStream_t stream = nullptr) const;
+
+      /**
+       * Unpacks only those populations streaming in directions aligned with the sending direction dir to the given cell interval.
+       * For example, in 2D, if dir == N, the pdfs streaming in directions NW, N, NE are unpacked.
+       * */
+      void unpackDirection(PdfField_T * pdfs_dst, CellInterval & ci, unsigned char * inBuffer, stencil::Direction dir, gpuStream_t stream = nullptr) const;
+
+      /** Copies data between two blocks on the same process.
+        * PDFs streaming aligned with the direction dir are copied from the sending interval onto the receiving interval.
+        * */
+      void localCopyDirection(PdfField_T * pdfs_src, CellInterval & srcInterval, PdfField_T * pdfs_dst, CellInterval & dstInterval, stencil::Direction dir, gpuStream_t stream = nullptr) const;
+
+      /**
+       * Returns the number of bytes that will be packed from / unpacked to the cell interval
+       * when using packDirection / unpackDirection
+       * @param ci  The cell interval
+       * @param dir The communication direction
+       * @return    The required size of the buffer, in bytes
+       * */
+      uint_t size (CellInterval & ci, stencil::Direction dir) const {
+         return ci.numCells() * sizes[dir] * sizeof(value_type);
+      }
+
+      /**
+       * Returns the number of bytes that will be packed from / unpacked to the cell interval
+       * when using packAll / unpackAll
+       * @param ci  The cell interval
+       * @return    The required size of the buffer, in bytes
+       * */
+      uint_t size (CellInterval & ci) const {
+         return ci.numCells() * 19 * sizeof(value_type);
+      }
+
+      /**
+       * Unpacks and uniformly redistributes populations coming from a coarse block onto the fine grid.
+       * */
+      void unpackRedistribute(PdfField_T * pdfs_dst, CellInterval & ci, unsigned char * inBuffer, stencil::Direction dir, gpuStream_t stream = nullptr) const;
+
+      /**
+       * Partially coalesces and packs populations streaming from a fine block into a coarse block
+       * */
+      void packPartialCoalescence(PdfField_T * pdfs_src, MaskField_T * mask, CellInterval & ci, unsigned char * outBuffer, stencil::Direction dir, gpuStream_t stream = nullptr) const;
+
+      /**
+       * Prepares a coarse block for coalescence by setting every population that must be coalesced from fine blocks to zero.
+       * */
+      void zeroCoalescenceRegion(PdfField_T * pdfs_dst, CellInterval & ci, stencil::Direction dir, gpuStream_t stream = nullptr) const;
+
+      /**
+       * Unpacks and coalesces populations coming from a fine block onto the fine grid
+       * */
+      void unpackCoalescence(PdfField_T * pdfs_dst, CellInterval & ci, unsigned char * inBuffer, stencil::Direction dir, gpuStream_t stream = nullptr) const;
+
+      /**
+       * Returns the number of bytes that will be unpacked to the cell interval
+       * when using unpackRedistribute. This is 2^{-d} of the data that would be
+       * unpacked during same-level communication.
+       * @param ci  The cell interval
+       * @return    The required size of the buffer, in bytes
+       * */
+      uint_t redistributeSize(CellInterval & ci) const {
+         return size(ci) >> 3;
+      }
+
+      /**
+       * Returns the number of bytes that will be packed from the cell interval
+       * when using packPartialCoalescence.
+       * @param ci  The cell interval
+       * @param dir The communication direction
+       * @return    The required size of the buffer, in bytes
+       * */
+      uint_t partialCoalescenceSize(CellInterval & ci, stencil::Direction dir) const {
+         return size(ci, dir) >> 3;
+      }
+
+    private:
+      const uint_t sizes[27] { 0, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
+   };
+
+};
+
+}} //lbm/walberla
\ No newline at end of file
diff --git a/apps/benchmarks/GridRefSphere/SweepCollection.cu b/apps/benchmarks/GridRefSphere/SweepCollection.cu
new file mode 100644
index 000000000..beede3153
--- /dev/null
+++ b/apps/benchmarks/GridRefSphere/SweepCollection.cu
@@ -0,0 +1,848 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file SweepCollection.cpp
+//! \\author pystencils
+//======================================================================================================================
+#include "SweepCollection.h"
+
+#define FUNC_PREFIX __global__
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_INTEL )
+#pragma warning push
+#pragma warning( disable :  1599 )
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace lbm {
+
+
+namespace internal_sweepcollection_kernel_streamCollide {
+static FUNC_PREFIX __launch_bounds__(256) void sweepcollection_kernel_streamCollide(double * RESTRICT const _data_pdfs, double * RESTRICT  _data_pdfs_tmp, double * RESTRICT  _data_vel, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_pdfs_tmp_0, int64_t const _stride_pdfs_tmp_1, int64_t const _stride_pdfs_tmp_2, int64_t const _stride_pdfs_tmp_3, int64_t const _stride_vel_0, int64_t const _stride_vel_1, int64_t const _stride_vel_2, int64_t const _stride_vel_3, double omega)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT _data_pdfs_10_2m1_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_11_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_1m1_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_10_21_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_10_20_34 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
+      const double vel0Term = _data_pdfs_10_20_34[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+      double * RESTRICT _data_pdfs_1m1_2m1_311 = _data_pdfs + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_1m1_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_1m1_20_31 = _data_pdfs + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
+      double * RESTRICT _data_pdfs_1m1_21_315 = _data_pdfs + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3;
+      const double vel1Term = _data_pdfs_1m1_20_31[_stride_pdfs_0*ctr_0] + _data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] + _data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_10_2m1_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_11_2m1_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_10_2m1_35 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3;
+      const double vel2Term = _data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_2m1_35[_stride_pdfs_0*ctr_0] + _data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_10_20_30 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2;
+      double * RESTRICT _data_pdfs_10_20_33 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_11_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_11_20_32 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_10_21_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_11_21_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_10_21_36 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
+      const double rho = vel0Term + vel1Term + vel2Term + _data_pdfs_10_20_30[_stride_pdfs_0*ctr_0] + _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_21_36[_stride_pdfs_0*ctr_0] + _data_pdfs_11_20_32[_stride_pdfs_0*ctr_0] + _data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_11_21_316[_stride_pdfs_0*ctr_0];
+      const double u_0 = vel0Term - 1.0*_data_pdfs_10_20_33[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+      const double u_1 = vel1Term - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_11_20_32[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] + _data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+      const double u_2 = vel2Term - 1.0*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_21_36[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] + _data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0];
+      const double delta_rho = rho - 1.0;
+      const double momdensity_0 = vel0Term - 1.0*_data_pdfs_10_20_33[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+      const double momdensity_1 = vel1Term - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_11_20_32[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] + _data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+      const double momdensity_2 = vel2Term - 1.0*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_21_36[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] + _data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0];
+      const double xi_0 = momdensity_0;
+      const double xi_1 = momdensity_1;
+      const double xi_2 = momdensity_2;
+      double * RESTRICT  _data_pdfs_tmp_10_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2;
+      _data_pdfs_tmp_10_20_30[_stride_pdfs_tmp_0*ctr_0] = omega*(rho - 1.0*_data_pdfs_10_20_30[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_10_20_33[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_20_34[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_21_36[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_35[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_11_20_32[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_20_31[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0]) + omega*-1.0*(rho*0.33333333333333331 - 1.0*_data_pdfs_10_20_33[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_20_34[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + u_0*u_0) + omega*-1.0*(rho*0.33333333333333331 - 1.0*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_21_36[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_35[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + u_2*u_2) + omega*-1.0*(rho*0.33333333333333331 - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_11_20_32[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_20_31[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + u_1*u_1) + omega*(rho*0.1111111111111111 - 0.16666666666666666*(u_0*u_0) - 1.0*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1) + 0.33333333333333331*(u_2*u_2)) + omega*(rho*0.1111111111111111 - 0.16666666666666666*(u_1*u_1) - 1.0*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_2*u_2)) + omega*(rho*0.1111111111111111 - 0.16666666666666666*(u_2*u_2) - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_1*u_1)) + _data_pdfs_10_20_30[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_tmp_10_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_31[_stride_pdfs_tmp_0*ctr_0] = omega*(u_1*0.33333333333333331 - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*-0.5 + omega*(u_1*0.33333333333333331 - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + _data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] + _data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0])*-0.5 + omega*(u_1 - 1.0*_data_pdfs_1m1_20_31[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + _data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_32[_stride_pdfs_0*ctr_0] + _data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] + _data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0])*0.5 + omega*-0.5*(rho*0.1111111111111111 - 0.16666666666666666*(u_0*u_0) - 1.0*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1) + 0.33333333333333331*(u_2*u_2)) + omega*-0.5*(rho*0.1111111111111111 - 0.16666666666666666*(u_2*u_2) - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_1*u_1)) + omega*0.5*(rho*0.33333333333333331 - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_11_20_32[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_20_31[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + u_1*u_1) + _data_pdfs_1m1_20_31[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_tmp_10_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_32[_stride_pdfs_tmp_0*ctr_0] = omega*(u_1*0.33333333333333331 - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*0.5 + omega*(u_1*0.33333333333333331 - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + _data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] + _data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0])*0.5 + omega*(u_1 - 1.0*_data_pdfs_1m1_20_31[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + _data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_32[_stride_pdfs_0*ctr_0] + _data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] + _data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0])*-0.5 + omega*-0.5*(rho*0.1111111111111111 - 0.16666666666666666*(u_0*u_0) - 1.0*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1) + 0.33333333333333331*(u_2*u_2)) + omega*-0.5*(rho*0.1111111111111111 - 0.16666666666666666*(u_2*u_2) - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_1*u_1)) + omega*0.5*(rho*0.33333333333333331 - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_11_20_32[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_20_31[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + u_1*u_1) + _data_pdfs_11_20_32[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_tmp_10_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_33[_stride_pdfs_tmp_0*ctr_0] = omega*(u_0*0.33333333333333331 - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*0.5 + omega*(u_0*0.33333333333333331 - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*0.5 + omega*(u_0 - 1.0*_data_pdfs_10_20_34[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*-0.5 + omega*-0.5*(rho*0.1111111111111111 - 0.16666666666666666*(u_1*u_1) - 1.0*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_2*u_2)) + omega*-0.5*(rho*0.1111111111111111 - 0.16666666666666666*(u_2*u_2) - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_1*u_1)) + omega*0.5*(rho*0.33333333333333331 - 1.0*_data_pdfs_10_20_33[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_20_34[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + u_0*u_0) + _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+      double * RESTRICT  _data_pdfs_tmp_10_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_34[_stride_pdfs_tmp_0*ctr_0] = omega*(u_0*0.33333333333333331 - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*-0.5 + omega*(u_0*0.33333333333333331 - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*-0.5 + omega*(u_0 - 1.0*_data_pdfs_10_20_34[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*0.5 + omega*-0.5*(rho*0.1111111111111111 - 0.16666666666666666*(u_1*u_1) - 1.0*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_2*u_2)) + omega*-0.5*(rho*0.1111111111111111 - 0.16666666666666666*(u_2*u_2) - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_1*u_1)) + omega*0.5*(rho*0.33333333333333331 - 1.0*_data_pdfs_10_20_33[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_20_34[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + u_0*u_0) + _data_pdfs_10_20_34[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+      double * RESTRICT  _data_pdfs_tmp_10_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_35[_stride_pdfs_tmp_0*ctr_0] = omega*(u_2*0.33333333333333331 - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0])*-0.5 + omega*(u_2*0.33333333333333331 - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + _data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] + _data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0])*-0.5 + omega*(u_2 - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_35[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + _data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_21_36[_stride_pdfs_0*ctr_0] + _data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] + _data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0])*0.5 + omega*-0.5*(rho*0.1111111111111111 - 0.16666666666666666*(u_0*u_0) - 1.0*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1) + 0.33333333333333331*(u_2*u_2)) + omega*-0.5*(rho*0.1111111111111111 - 0.16666666666666666*(u_1*u_1) - 1.0*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_2*u_2)) + omega*0.5*(rho*0.33333333333333331 - 1.0*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_21_36[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_35[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + u_2*u_2) + _data_pdfs_10_2m1_35[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_tmp_10_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_36[_stride_pdfs_tmp_0*ctr_0] = omega*(u_2*0.33333333333333331 - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0])*0.5 + omega*(u_2*0.33333333333333331 - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + _data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] + _data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0])*0.5 + omega*(u_2 - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_35[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + _data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_21_36[_stride_pdfs_0*ctr_0] + _data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] + _data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0])*-0.5 + omega*-0.5*(rho*0.1111111111111111 - 0.16666666666666666*(u_0*u_0) - 1.0*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1) + 0.33333333333333331*(u_2*u_2)) + omega*-0.5*(rho*0.1111111111111111 - 0.16666666666666666*(u_1*u_1) - 1.0*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_2*u_2)) + omega*0.5*(rho*0.33333333333333331 - 1.0*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_21_36[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_35[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + u_2*u_2) + _data_pdfs_10_21_36[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_tmp_10_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_37[_stride_pdfs_tmp_0*ctr_0] = omega*(u_0*u_1 - 1.0*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*-0.25 + omega*(u_0*0.33333333333333331 - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*-0.25 + omega*(u_1*0.33333333333333331 - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*0.25 + omega*0.25*(rho*0.1111111111111111 - 0.16666666666666666*(u_2*u_2) - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_1*u_1)) + _data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+      double * RESTRICT  _data_pdfs_tmp_10_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_38[_stride_pdfs_tmp_0*ctr_0] = omega*(u_0*u_1 - 1.0*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*0.25 + omega*(u_0*0.33333333333333331 - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*0.25 + omega*(u_1*0.33333333333333331 - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*0.25 + omega*0.25*(rho*0.1111111111111111 - 0.16666666666666666*(u_2*u_2) - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_1*u_1)) + _data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+      double * RESTRICT  _data_pdfs_tmp_10_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_39[_stride_pdfs_tmp_0*ctr_0] = omega*(u_0*u_1 - 1.0*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*0.25 + omega*(u_0*0.33333333333333331 - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*-0.25 + omega*(u_1*0.33333333333333331 - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*-0.25 + omega*0.25*(rho*0.1111111111111111 - 0.16666666666666666*(u_2*u_2) - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_1*u_1)) + _data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+      double * RESTRICT  _data_pdfs_tmp_10_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_310[_stride_pdfs_tmp_0*ctr_0] = omega*(u_0*u_1 - 1.0*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*-0.25 + omega*(u_0*0.33333333333333331 - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*0.25 + omega*(u_1*0.33333333333333331 - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*-0.25 + omega*0.25*(rho*0.1111111111111111 - 0.16666666666666666*(u_2*u_2) - 1.0*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_1*u_1)) + _data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+      double * RESTRICT  _data_pdfs_tmp_10_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_311[_stride_pdfs_tmp_0*ctr_0] = omega*(u_1*u_2 - 1.0*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + _data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] + _data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0])*0.25 + omega*(u_1*0.33333333333333331 - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + _data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] + _data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0])*0.25 + omega*(u_2*0.33333333333333331 - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + _data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] + _data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0])*0.25 + omega*0.25*(rho*0.1111111111111111 - 0.16666666666666666*(u_0*u_0) - 1.0*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1) + 0.33333333333333331*(u_2*u_2)) + _data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_tmp_10_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_312[_stride_pdfs_tmp_0*ctr_0] = omega*(u_1*u_2 - 1.0*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + _data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] + _data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0])*-0.25 + omega*(u_1*0.33333333333333331 - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + _data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] + _data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0])*-0.25 + omega*(u_2*0.33333333333333331 - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + _data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] + _data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0])*0.25 + omega*0.25*(rho*0.1111111111111111 - 0.16666666666666666*(u_0*u_0) - 1.0*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1) + 0.33333333333333331*(u_2*u_2)) + _data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_tmp_10_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_313[_stride_pdfs_tmp_0*ctr_0] = omega*(u_0*u_2 - 1.0*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*-0.25 + omega*(u_0*0.33333333333333331 - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*-0.25 + omega*(u_2*0.33333333333333331 - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0])*0.25 + omega*0.25*(rho*0.1111111111111111 - 0.16666666666666666*(u_1*u_1) - 1.0*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_2*u_2)) + _data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+      double * RESTRICT  _data_pdfs_tmp_10_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_314[_stride_pdfs_tmp_0*ctr_0] = omega*(u_0*u_2 - 1.0*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*0.25 + omega*(u_0*0.33333333333333331 - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*0.25 + omega*(u_2*0.33333333333333331 - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0])*0.25 + omega*0.25*(rho*0.1111111111111111 - 0.16666666666666666*(u_1*u_1) - 1.0*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_2*u_2)) + _data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+      double * RESTRICT  _data_pdfs_tmp_10_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_315[_stride_pdfs_tmp_0*ctr_0] = omega*(u_1*u_2 - 1.0*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + _data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] + _data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0])*-0.25 + omega*(u_1*0.33333333333333331 - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + _data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] + _data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0])*0.25 + omega*(u_2*0.33333333333333331 - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + _data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] + _data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0])*-0.25 + omega*0.25*(rho*0.1111111111111111 - 0.16666666666666666*(u_0*u_0) - 1.0*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1) + 0.33333333333333331*(u_2*u_2)) + _data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_tmp_10_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_316[_stride_pdfs_tmp_0*ctr_0] = omega*(u_1*u_2 - 1.0*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + _data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] + _data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0])*0.25 + omega*(u_1*0.33333333333333331 - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + _data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] + _data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0])*-0.25 + omega*(u_2*0.33333333333333331 - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + _data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] + _data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0])*-0.25 + omega*0.25*(rho*0.1111111111111111 - 0.16666666666666666*(u_0*u_0) - 1.0*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1) + 0.33333333333333331*(u_2*u_2)) + _data_pdfs_11_21_316[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_tmp_10_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_317[_stride_pdfs_tmp_0*ctr_0] = omega*(u_0*u_2 - 1.0*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*0.25 + omega*(u_0*0.33333333333333331 - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*-0.25 + omega*(u_2*0.33333333333333331 - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0])*-0.25 + omega*0.25*(rho*0.1111111111111111 - 0.16666666666666666*(u_1*u_1) - 1.0*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_2*u_2)) + _data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+      double * RESTRICT  _data_pdfs_tmp_10_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_318[_stride_pdfs_tmp_0*ctr_0] = omega*(u_0*u_2 - 1.0*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*-0.25 + omega*(u_0*0.33333333333333331 - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0])*0.25 + omega*(u_2*0.33333333333333331 - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0])*-0.25 + omega*0.25*(rho*0.1111111111111111 - 0.16666666666666666*(u_1*u_1) - 1.0*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_2*u_2)) + _data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+      double * RESTRICT  _data_vel_10_20_30 = _data_vel + _stride_vel_1*ctr_1 + _stride_vel_2*ctr_2;
+      _data_vel_10_20_30[_stride_vel_0*ctr_0] = xi_0;
+      double * RESTRICT  _data_vel_10_20_31 = _data_vel + _stride_vel_1*ctr_1 + _stride_vel_2*ctr_2 + _stride_vel_3;
+      _data_vel_10_20_31[_stride_vel_0*ctr_0] = xi_1;
+      double * RESTRICT  _data_vel_10_20_32 = _data_vel + _stride_vel_1*ctr_1 + _stride_vel_2*ctr_2 + 2*_stride_vel_3;
+      _data_vel_10_20_32[_stride_vel_0*ctr_0] = xi_2;
+   } 
+}
+}
+
+
+namespace internal_sweepcollection_kernel_collide {
+static FUNC_PREFIX __launch_bounds__(256) void sweepcollection_kernel_collide(double * RESTRICT  _data_pdfs, double * RESTRICT  _data_vel, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_vel_0, int64_t const _stride_vel_1, int64_t const _stride_vel_2, int64_t const _stride_vel_3, double omega)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT  _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
+      const double xi_3 = _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_10_20_31 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
+      const double xi_4 = _data_pdfs_10_20_31[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_10_20_30 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2;
+      const double xi_5 = _data_pdfs_10_20_30[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
+      const double xi_6 = _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
+      const double xi_7 = _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
+      const double xi_8 = _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
+      const double xi_9 = _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_10_20_35 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
+      const double xi_10 = _data_pdfs_10_20_35[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
+      const double xi_11 = _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
+      const double xi_12 = _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_10_20_36 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
+      const double xi_13 = _data_pdfs_10_20_36[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
+      const double xi_14 = _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
+      const double xi_15 = _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
+      const double xi_16 = _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
+      const double xi_17 = _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
+      const double xi_18 = _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_10_20_32 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
+      const double xi_19 = _data_pdfs_10_20_32[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_10_20_33 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
+      const double xi_20 = _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0];
+      double * RESTRICT  _data_pdfs_10_20_34 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
+      const double xi_21 = _data_pdfs_10_20_34[_stride_pdfs_0*ctr_0];
+      const double vel0Term = xi_14 + xi_17 + xi_18 + xi_21 + xi_3;
+      const double vel1Term = xi_12 + xi_15 + xi_16 + xi_4;
+      const double vel2Term = xi_10 + xi_6 + xi_7;
+      const double rho = vel0Term + vel1Term + vel2Term + xi_11 + xi_13 + xi_19 + xi_20 + xi_5 + xi_8 + xi_9;
+      const double u_0 = vel0Term + xi_16*-1.0 + xi_20*-1.0 + xi_7*-1.0 + xi_8*-1.0 + xi_9*-1.0;
+      const double u_1 = vel1Term + xi_11*-1.0 + xi_14*-1.0 + xi_18 + xi_19*-1.0 + xi_6*-1.0 + xi_9*-1.0;
+      const double u_2 = vel2Term + xi_11*-1.0 + xi_12 + xi_13*-1.0 + xi_15*-1.0 + xi_17 + xi_3*-1.0 + xi_8*-1.0;
+      const double delta_rho = rho - 1.0;
+      const double momdensity_0 = vel0Term + xi_16*-1.0 + xi_20*-1.0 + xi_7*-1.0 + xi_8*-1.0 + xi_9*-1.0;
+      const double momdensity_1 = vel1Term + xi_11*-1.0 + xi_14*-1.0 + xi_18 + xi_19*-1.0 + xi_6*-1.0 + xi_9*-1.0;
+      const double momdensity_2 = vel2Term + xi_11*-1.0 + xi_12 + xi_13*-1.0 + xi_15*-1.0 + xi_17 + xi_3*-1.0 + xi_8*-1.0;
+      const double xi_0 = momdensity_0;
+      const double xi_1 = momdensity_1;
+      const double xi_2 = momdensity_2;
+      _data_pdfs_10_20_30[_stride_pdfs_0*ctr_0] = omega*(rho + xi_10*-1.0 + xi_11*-1.0 + xi_12*-1.0 + xi_13*-1.0 + xi_14*-1.0 + xi_15*-1.0 + xi_16*-1.0 + xi_17*-1.0 + xi_18*-1.0 + xi_19*-1.0 + xi_20*-1.0 + xi_21*-1.0 + xi_3*-1.0 + xi_4*-1.0 + xi_5*-1.0 + xi_6*-1.0 + xi_7*-1.0 + xi_8*-1.0 + xi_9*-1.0) + omega*-1.0*(rho*0.33333333333333331 + xi_10*-1.0 + xi_11*-1.0 + xi_12*-1.0 + xi_13*-1.0 + xi_15*-1.0 + xi_17*-1.0 + xi_3*-1.0 + xi_6*-1.0 + xi_7*-1.0 + xi_8*-1.0 + u_2*u_2) + omega*-1.0*(rho*0.33333333333333331 + xi_11*-1.0 + xi_12*-1.0 + xi_14*-1.0 + xi_15*-1.0 + xi_16*-1.0 + xi_18*-1.0 + xi_19*-1.0 + xi_4*-1.0 + xi_6*-1.0 + xi_9*-1.0 + u_1*u_1) + omega*-1.0*(rho*0.33333333333333331 + xi_14*-1.0 + xi_16*-1.0 + xi_17*-1.0 + xi_18*-1.0 + xi_20*-1.0 + xi_21*-1.0 + xi_3*-1.0 + xi_7*-1.0 + xi_8*-1.0 + xi_9*-1.0 + u_0*u_0) + omega*(rho*0.1111111111111111 + xi_11*-1.0 + xi_12*-1.0 + xi_15*-1.0 + xi_6*-1.0 - 0.16666666666666666*(u_0*u_0) + 0.33333333333333331*(u_1*u_1) + 0.33333333333333331*(u_2*u_2)) + omega*(rho*0.1111111111111111 + xi_14*-1.0 + xi_16*-1.0 + xi_18*-1.0 + xi_9*-1.0 - 0.16666666666666666*(u_2*u_2) + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_1*u_1)) + omega*(rho*0.1111111111111111 + xi_17*-1.0 + xi_3*-1.0 + xi_7*-1.0 + xi_8*-1.0 - 0.16666666666666666*(u_1*u_1) + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_2*u_2)) + xi_5;
+      _data_pdfs_10_20_31[_stride_pdfs_0*ctr_0] = omega*(u_1*0.33333333333333331 + xi_11 + xi_12*-1.0 + xi_15*-1.0 + xi_6)*-0.5 + omega*(u_1*0.33333333333333331 + xi_14 + xi_16*-1.0 + xi_18*-1.0 + xi_9)*-0.5 + omega*(u_1 + xi_11 + xi_12*-1.0 + xi_14 + xi_15*-1.0 + xi_16*-1.0 + xi_18*-1.0 + xi_19 + xi_4*-1.0 + xi_6 + xi_9)*0.5 + omega*-0.5*(rho*0.1111111111111111 + xi_11*-1.0 + xi_12*-1.0 + xi_15*-1.0 + xi_6*-1.0 - 0.16666666666666666*(u_0*u_0) + 0.33333333333333331*(u_1*u_1) + 0.33333333333333331*(u_2*u_2)) + omega*-0.5*(rho*0.1111111111111111 + xi_14*-1.0 + xi_16*-1.0 + xi_18*-1.0 + xi_9*-1.0 - 0.16666666666666666*(u_2*u_2) + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_1*u_1)) + omega*0.5*(rho*0.33333333333333331 + xi_11*-1.0 + xi_12*-1.0 + xi_14*-1.0 + xi_15*-1.0 + xi_16*-1.0 + xi_18*-1.0 + xi_19*-1.0 + xi_4*-1.0 + xi_6*-1.0 + xi_9*-1.0 + u_1*u_1) + xi_4;
+      _data_pdfs_10_20_32[_stride_pdfs_0*ctr_0] = omega*(u_1*0.33333333333333331 + xi_11 + xi_12*-1.0 + xi_15*-1.0 + xi_6)*0.5 + omega*(u_1*0.33333333333333331 + xi_14 + xi_16*-1.0 + xi_18*-1.0 + xi_9)*0.5 + omega*(u_1 + xi_11 + xi_12*-1.0 + xi_14 + xi_15*-1.0 + xi_16*-1.0 + xi_18*-1.0 + xi_19 + xi_4*-1.0 + xi_6 + xi_9)*-0.5 + omega*-0.5*(rho*0.1111111111111111 + xi_11*-1.0 + xi_12*-1.0 + xi_15*-1.0 + xi_6*-1.0 - 0.16666666666666666*(u_0*u_0) + 0.33333333333333331*(u_1*u_1) + 0.33333333333333331*(u_2*u_2)) + omega*-0.5*(rho*0.1111111111111111 + xi_14*-1.0 + xi_16*-1.0 + xi_18*-1.0 + xi_9*-1.0 - 0.16666666666666666*(u_2*u_2) + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_1*u_1)) + omega*0.5*(rho*0.33333333333333331 + xi_11*-1.0 + xi_12*-1.0 + xi_14*-1.0 + xi_15*-1.0 + xi_16*-1.0 + xi_18*-1.0 + xi_19*-1.0 + xi_4*-1.0 + xi_6*-1.0 + xi_9*-1.0 + u_1*u_1) + xi_19;
+      _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0] = omega*(u_0*0.33333333333333331 + xi_14*-1.0 + xi_16 + xi_18*-1.0 + xi_9)*0.5 + omega*(u_0*0.33333333333333331 + xi_17*-1.0 + xi_3*-1.0 + xi_7 + xi_8)*0.5 + omega*(u_0 + xi_14*-1.0 + xi_16 + xi_17*-1.0 + xi_18*-1.0 + xi_20 + xi_21*-1.0 + xi_3*-1.0 + xi_7 + xi_8 + xi_9)*-0.5 + omega*-0.5*(rho*0.1111111111111111 + xi_14*-1.0 + xi_16*-1.0 + xi_18*-1.0 + xi_9*-1.0 - 0.16666666666666666*(u_2*u_2) + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_1*u_1)) + omega*-0.5*(rho*0.1111111111111111 + xi_17*-1.0 + xi_3*-1.0 + xi_7*-1.0 + xi_8*-1.0 - 0.16666666666666666*(u_1*u_1) + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_2*u_2)) + omega*0.5*(rho*0.33333333333333331 + xi_14*-1.0 + xi_16*-1.0 + xi_17*-1.0 + xi_18*-1.0 + xi_20*-1.0 + xi_21*-1.0 + xi_3*-1.0 + xi_7*-1.0 + xi_8*-1.0 + xi_9*-1.0 + u_0*u_0) + xi_20;
+      _data_pdfs_10_20_34[_stride_pdfs_0*ctr_0] = omega*(u_0*0.33333333333333331 + xi_14*-1.0 + xi_16 + xi_18*-1.0 + xi_9)*-0.5 + omega*(u_0*0.33333333333333331 + xi_17*-1.0 + xi_3*-1.0 + xi_7 + xi_8)*-0.5 + omega*(u_0 + xi_14*-1.0 + xi_16 + xi_17*-1.0 + xi_18*-1.0 + xi_20 + xi_21*-1.0 + xi_3*-1.0 + xi_7 + xi_8 + xi_9)*0.5 + omega*-0.5*(rho*0.1111111111111111 + xi_14*-1.0 + xi_16*-1.0 + xi_18*-1.0 + xi_9*-1.0 - 0.16666666666666666*(u_2*u_2) + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_1*u_1)) + omega*-0.5*(rho*0.1111111111111111 + xi_17*-1.0 + xi_3*-1.0 + xi_7*-1.0 + xi_8*-1.0 - 0.16666666666666666*(u_1*u_1) + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_2*u_2)) + omega*0.5*(rho*0.33333333333333331 + xi_14*-1.0 + xi_16*-1.0 + xi_17*-1.0 + xi_18*-1.0 + xi_20*-1.0 + xi_21*-1.0 + xi_3*-1.0 + xi_7*-1.0 + xi_8*-1.0 + xi_9*-1.0 + u_0*u_0) + xi_21;
+      _data_pdfs_10_20_35[_stride_pdfs_0*ctr_0] = omega*(u_2*0.33333333333333331 + xi_11 + xi_12*-1.0 + xi_15 + xi_6*-1.0)*-0.5 + omega*(u_2*0.33333333333333331 + xi_17*-1.0 + xi_3 + xi_7*-1.0 + xi_8)*-0.5 + omega*(u_2 + xi_10*-1.0 + xi_11 + xi_12*-1.0 + xi_13 + xi_15 + xi_17*-1.0 + xi_3 + xi_6*-1.0 + xi_7*-1.0 + xi_8)*0.5 + omega*-0.5*(rho*0.1111111111111111 + xi_11*-1.0 + xi_12*-1.0 + xi_15*-1.0 + xi_6*-1.0 - 0.16666666666666666*(u_0*u_0) + 0.33333333333333331*(u_1*u_1) + 0.33333333333333331*(u_2*u_2)) + omega*-0.5*(rho*0.1111111111111111 + xi_17*-1.0 + xi_3*-1.0 + xi_7*-1.0 + xi_8*-1.0 - 0.16666666666666666*(u_1*u_1) + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_2*u_2)) + omega*0.5*(rho*0.33333333333333331 + xi_10*-1.0 + xi_11*-1.0 + xi_12*-1.0 + xi_13*-1.0 + xi_15*-1.0 + xi_17*-1.0 + xi_3*-1.0 + xi_6*-1.0 + xi_7*-1.0 + xi_8*-1.0 + u_2*u_2) + xi_10;
+      _data_pdfs_10_20_36[_stride_pdfs_0*ctr_0] = omega*(u_2*0.33333333333333331 + xi_11 + xi_12*-1.0 + xi_15 + xi_6*-1.0)*0.5 + omega*(u_2*0.33333333333333331 + xi_17*-1.0 + xi_3 + xi_7*-1.0 + xi_8)*0.5 + omega*(u_2 + xi_10*-1.0 + xi_11 + xi_12*-1.0 + xi_13 + xi_15 + xi_17*-1.0 + xi_3 + xi_6*-1.0 + xi_7*-1.0 + xi_8)*-0.5 + omega*-0.5*(rho*0.1111111111111111 + xi_11*-1.0 + xi_12*-1.0 + xi_15*-1.0 + xi_6*-1.0 - 0.16666666666666666*(u_0*u_0) + 0.33333333333333331*(u_1*u_1) + 0.33333333333333331*(u_2*u_2)) + omega*-0.5*(rho*0.1111111111111111 + xi_17*-1.0 + xi_3*-1.0 + xi_7*-1.0 + xi_8*-1.0 - 0.16666666666666666*(u_1*u_1) + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_2*u_2)) + omega*0.5*(rho*0.33333333333333331 + xi_10*-1.0 + xi_11*-1.0 + xi_12*-1.0 + xi_13*-1.0 + xi_15*-1.0 + xi_17*-1.0 + xi_3*-1.0 + xi_6*-1.0 + xi_7*-1.0 + xi_8*-1.0 + u_2*u_2) + xi_13;
+      _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0] = omega*(u_0*u_1 + xi_14 + xi_16 + xi_18*-1.0 + xi_9*-1.0)*-0.25 + omega*(u_0*0.33333333333333331 + xi_14*-1.0 + xi_16 + xi_18*-1.0 + xi_9)*-0.25 + omega*(u_1*0.33333333333333331 + xi_14 + xi_16*-1.0 + xi_18*-1.0 + xi_9)*0.25 + omega*0.25*(rho*0.1111111111111111 + xi_14*-1.0 + xi_16*-1.0 + xi_18*-1.0 + xi_9*-1.0 - 0.16666666666666666*(u_2*u_2) + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_1*u_1)) + xi_16;
+      _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0] = omega*(u_0*u_1 + xi_14 + xi_16 + xi_18*-1.0 + xi_9*-1.0)*0.25 + omega*(u_0*0.33333333333333331 + xi_14*-1.0 + xi_16 + xi_18*-1.0 + xi_9)*0.25 + omega*(u_1*0.33333333333333331 + xi_14 + xi_16*-1.0 + xi_18*-1.0 + xi_9)*0.25 + omega*0.25*(rho*0.1111111111111111 + xi_14*-1.0 + xi_16*-1.0 + xi_18*-1.0 + xi_9*-1.0 - 0.16666666666666666*(u_2*u_2) + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_1*u_1)) + xi_18;
+      _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0] = omega*(u_0*u_1 + xi_14 + xi_16 + xi_18*-1.0 + xi_9*-1.0)*0.25 + omega*(u_0*0.33333333333333331 + xi_14*-1.0 + xi_16 + xi_18*-1.0 + xi_9)*-0.25 + omega*(u_1*0.33333333333333331 + xi_14 + xi_16*-1.0 + xi_18*-1.0 + xi_9)*-0.25 + omega*0.25*(rho*0.1111111111111111 + xi_14*-1.0 + xi_16*-1.0 + xi_18*-1.0 + xi_9*-1.0 - 0.16666666666666666*(u_2*u_2) + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_1*u_1)) + xi_9;
+      _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0] = omega*(u_0*u_1 + xi_14 + xi_16 + xi_18*-1.0 + xi_9*-1.0)*-0.25 + omega*(u_0*0.33333333333333331 + xi_14*-1.0 + xi_16 + xi_18*-1.0 + xi_9)*0.25 + omega*(u_1*0.33333333333333331 + xi_14 + xi_16*-1.0 + xi_18*-1.0 + xi_9)*-0.25 + omega*0.25*(rho*0.1111111111111111 + xi_14*-1.0 + xi_16*-1.0 + xi_18*-1.0 + xi_9*-1.0 - 0.16666666666666666*(u_2*u_2) + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_1*u_1)) + xi_14;
+      _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0] = omega*(u_1*u_2 + xi_11*-1.0 + xi_12*-1.0 + xi_15 + xi_6)*0.25 + omega*(u_1*0.33333333333333331 + xi_11 + xi_12*-1.0 + xi_15*-1.0 + xi_6)*0.25 + omega*(u_2*0.33333333333333331 + xi_11 + xi_12*-1.0 + xi_15 + xi_6*-1.0)*0.25 + omega*0.25*(rho*0.1111111111111111 + xi_11*-1.0 + xi_12*-1.0 + xi_15*-1.0 + xi_6*-1.0 - 0.16666666666666666*(u_0*u_0) + 0.33333333333333331*(u_1*u_1) + 0.33333333333333331*(u_2*u_2)) + xi_12;
+      _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0] = omega*(u_1*u_2 + xi_11*-1.0 + xi_12*-1.0 + xi_15 + xi_6)*-0.25 + omega*(u_1*0.33333333333333331 + xi_11 + xi_12*-1.0 + xi_15*-1.0 + xi_6)*-0.25 + omega*(u_2*0.33333333333333331 + xi_11 + xi_12*-1.0 + xi_15 + xi_6*-1.0)*0.25 + omega*0.25*(rho*0.1111111111111111 + xi_11*-1.0 + xi_12*-1.0 + xi_15*-1.0 + xi_6*-1.0 - 0.16666666666666666*(u_0*u_0) + 0.33333333333333331*(u_1*u_1) + 0.33333333333333331*(u_2*u_2)) + xi_6;
+      _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0] = omega*(u_0*u_2 + xi_17*-1.0 + xi_3 + xi_7 + xi_8*-1.0)*-0.25 + omega*(u_0*0.33333333333333331 + xi_17*-1.0 + xi_3*-1.0 + xi_7 + xi_8)*-0.25 + omega*(u_2*0.33333333333333331 + xi_17*-1.0 + xi_3 + xi_7*-1.0 + xi_8)*0.25 + omega*0.25*(rho*0.1111111111111111 + xi_17*-1.0 + xi_3*-1.0 + xi_7*-1.0 + xi_8*-1.0 - 0.16666666666666666*(u_1*u_1) + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_2*u_2)) + xi_7;
+      _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0] = omega*(u_0*u_2 + xi_17*-1.0 + xi_3 + xi_7 + xi_8*-1.0)*0.25 + omega*(u_0*0.33333333333333331 + xi_17*-1.0 + xi_3*-1.0 + xi_7 + xi_8)*0.25 + omega*(u_2*0.33333333333333331 + xi_17*-1.0 + xi_3 + xi_7*-1.0 + xi_8)*0.25 + omega*0.25*(rho*0.1111111111111111 + xi_17*-1.0 + xi_3*-1.0 + xi_7*-1.0 + xi_8*-1.0 - 0.16666666666666666*(u_1*u_1) + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_2*u_2)) + xi_17;
+      _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0] = omega*(u_1*u_2 + xi_11*-1.0 + xi_12*-1.0 + xi_15 + xi_6)*-0.25 + omega*(u_1*0.33333333333333331 + xi_11 + xi_12*-1.0 + xi_15*-1.0 + xi_6)*0.25 + omega*(u_2*0.33333333333333331 + xi_11 + xi_12*-1.0 + xi_15 + xi_6*-1.0)*-0.25 + omega*0.25*(rho*0.1111111111111111 + xi_11*-1.0 + xi_12*-1.0 + xi_15*-1.0 + xi_6*-1.0 - 0.16666666666666666*(u_0*u_0) + 0.33333333333333331*(u_1*u_1) + 0.33333333333333331*(u_2*u_2)) + xi_15;
+      _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0] = omega*(u_1*u_2 + xi_11*-1.0 + xi_12*-1.0 + xi_15 + xi_6)*0.25 + omega*(u_1*0.33333333333333331 + xi_11 + xi_12*-1.0 + xi_15*-1.0 + xi_6)*-0.25 + omega*(u_2*0.33333333333333331 + xi_11 + xi_12*-1.0 + xi_15 + xi_6*-1.0)*-0.25 + omega*0.25*(rho*0.1111111111111111 + xi_11*-1.0 + xi_12*-1.0 + xi_15*-1.0 + xi_6*-1.0 - 0.16666666666666666*(u_0*u_0) + 0.33333333333333331*(u_1*u_1) + 0.33333333333333331*(u_2*u_2)) + xi_11;
+      _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0] = omega*(u_0*u_2 + xi_17*-1.0 + xi_3 + xi_7 + xi_8*-1.0)*0.25 + omega*(u_0*0.33333333333333331 + xi_17*-1.0 + xi_3*-1.0 + xi_7 + xi_8)*-0.25 + omega*(u_2*0.33333333333333331 + xi_17*-1.0 + xi_3 + xi_7*-1.0 + xi_8)*-0.25 + omega*0.25*(rho*0.1111111111111111 + xi_17*-1.0 + xi_3*-1.0 + xi_7*-1.0 + xi_8*-1.0 - 0.16666666666666666*(u_1*u_1) + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_2*u_2)) + xi_8;
+      _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0] = omega*(u_0*u_2 + xi_17*-1.0 + xi_3 + xi_7 + xi_8*-1.0)*-0.25 + omega*(u_0*0.33333333333333331 + xi_17*-1.0 + xi_3*-1.0 + xi_7 + xi_8)*0.25 + omega*(u_2*0.33333333333333331 + xi_17*-1.0 + xi_3 + xi_7*-1.0 + xi_8)*-0.25 + omega*0.25*(rho*0.1111111111111111 + xi_17*-1.0 + xi_3*-1.0 + xi_7*-1.0 + xi_8*-1.0 - 0.16666666666666666*(u_1*u_1) + 0.33333333333333331*(u_0*u_0) + 0.33333333333333331*(u_2*u_2)) + xi_3;
+      double * RESTRICT  _data_vel_10_20_30 = _data_vel + _stride_vel_1*ctr_1 + _stride_vel_2*ctr_2;
+      _data_vel_10_20_30[_stride_vel_0*ctr_0] = xi_0;
+      double * RESTRICT  _data_vel_10_20_31 = _data_vel + _stride_vel_1*ctr_1 + _stride_vel_2*ctr_2 + _stride_vel_3;
+      _data_vel_10_20_31[_stride_vel_0*ctr_0] = xi_1;
+      double * RESTRICT  _data_vel_10_20_32 = _data_vel + _stride_vel_1*ctr_1 + _stride_vel_2*ctr_2 + 2*_stride_vel_3;
+      _data_vel_10_20_32[_stride_vel_0*ctr_0] = xi_2;
+   } 
+}
+}
+
+
+namespace internal_sweepcollection_kernel_stream {
+static FUNC_PREFIX __launch_bounds__(256) void sweepcollection_kernel_stream(double * RESTRICT const _data_pdfs, double * RESTRICT  _data_pdfs_tmp, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_pdfs_tmp_0, int64_t const _stride_pdfs_tmp_1, int64_t const _stride_pdfs_tmp_2, int64_t const _stride_pdfs_tmp_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT _data_pdfs_10_20_30 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2;
+      const double streamed_0 = _data_pdfs_10_20_30[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_1m1_20_31 = _data_pdfs + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
+      const double streamed_1 = _data_pdfs_1m1_20_31[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_11_20_32 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
+      const double streamed_2 = _data_pdfs_11_20_32[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_10_20_33 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
+      const double streamed_3 = _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+      double * RESTRICT _data_pdfs_10_20_34 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
+      const double streamed_4 = _data_pdfs_10_20_34[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+      double * RESTRICT _data_pdfs_10_2m1_35 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3;
+      const double streamed_5 = _data_pdfs_10_2m1_35[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_10_21_36 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
+      const double streamed_6 = _data_pdfs_10_21_36[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_1m1_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
+      const double streamed_7 = _data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+      double * RESTRICT _data_pdfs_1m1_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
+      const double streamed_8 = _data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+      double * RESTRICT _data_pdfs_11_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
+      const double streamed_9 = _data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+      double * RESTRICT _data_pdfs_11_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
+      const double streamed_10 = _data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+      double * RESTRICT _data_pdfs_1m1_2m1_311 = _data_pdfs + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3;
+      const double streamed_11 = _data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_11_2m1_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3;
+      const double streamed_12 = _data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_10_2m1_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3;
+      const double streamed_13 = _data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+      double * RESTRICT _data_pdfs_10_2m1_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3;
+      const double streamed_14 = _data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+      double * RESTRICT _data_pdfs_1m1_21_315 = _data_pdfs + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3;
+      const double streamed_15 = _data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_11_21_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3;
+      const double streamed_16 = _data_pdfs_11_21_316[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_10_21_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3;
+      const double streamed_17 = _data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+      double * RESTRICT _data_pdfs_10_21_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3;
+      const double streamed_18 = _data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+      double * RESTRICT  _data_pdfs_tmp_10_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2;
+      _data_pdfs_tmp_10_20_30[_stride_pdfs_tmp_0*ctr_0] = streamed_0;
+      double * RESTRICT  _data_pdfs_tmp_10_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_31[_stride_pdfs_tmp_0*ctr_0] = streamed_1;
+      double * RESTRICT  _data_pdfs_tmp_10_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_32[_stride_pdfs_tmp_0*ctr_0] = streamed_2;
+      double * RESTRICT  _data_pdfs_tmp_10_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_33[_stride_pdfs_tmp_0*ctr_0] = streamed_3;
+      double * RESTRICT  _data_pdfs_tmp_10_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_34[_stride_pdfs_tmp_0*ctr_0] = streamed_4;
+      double * RESTRICT  _data_pdfs_tmp_10_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_35[_stride_pdfs_tmp_0*ctr_0] = streamed_5;
+      double * RESTRICT  _data_pdfs_tmp_10_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_36[_stride_pdfs_tmp_0*ctr_0] = streamed_6;
+      double * RESTRICT  _data_pdfs_tmp_10_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_37[_stride_pdfs_tmp_0*ctr_0] = streamed_7;
+      double * RESTRICT  _data_pdfs_tmp_10_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_38[_stride_pdfs_tmp_0*ctr_0] = streamed_8;
+      double * RESTRICT  _data_pdfs_tmp_10_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_39[_stride_pdfs_tmp_0*ctr_0] = streamed_9;
+      double * RESTRICT  _data_pdfs_tmp_10_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_310[_stride_pdfs_tmp_0*ctr_0] = streamed_10;
+      double * RESTRICT  _data_pdfs_tmp_10_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_311[_stride_pdfs_tmp_0*ctr_0] = streamed_11;
+      double * RESTRICT  _data_pdfs_tmp_10_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_312[_stride_pdfs_tmp_0*ctr_0] = streamed_12;
+      double * RESTRICT  _data_pdfs_tmp_10_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_313[_stride_pdfs_tmp_0*ctr_0] = streamed_13;
+      double * RESTRICT  _data_pdfs_tmp_10_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_314[_stride_pdfs_tmp_0*ctr_0] = streamed_14;
+      double * RESTRICT  _data_pdfs_tmp_10_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_315[_stride_pdfs_tmp_0*ctr_0] = streamed_15;
+      double * RESTRICT  _data_pdfs_tmp_10_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_316[_stride_pdfs_tmp_0*ctr_0] = streamed_16;
+      double * RESTRICT  _data_pdfs_tmp_10_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_317[_stride_pdfs_tmp_0*ctr_0] = streamed_17;
+      double * RESTRICT  _data_pdfs_tmp_10_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_318[_stride_pdfs_tmp_0*ctr_0] = streamed_18;
+   } 
+}
+}
+
+
+namespace internal_sweepcollection_kernel_streamOnlyNoAdvancement {
+static FUNC_PREFIX __launch_bounds__(256) void sweepcollection_kernel_streamOnlyNoAdvancement(double * RESTRICT const _data_pdfs, double * RESTRICT  _data_pdfs_tmp, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_pdfs_tmp_0, int64_t const _stride_pdfs_tmp_1, int64_t const _stride_pdfs_tmp_2, int64_t const _stride_pdfs_tmp_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT _data_pdfs_10_20_30 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2;
+      const double streamed_0 = _data_pdfs_10_20_30[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_1m1_20_31 = _data_pdfs + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
+      const double streamed_1 = _data_pdfs_1m1_20_31[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_11_20_32 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
+      const double streamed_2 = _data_pdfs_11_20_32[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_10_20_33 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
+      const double streamed_3 = _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+      double * RESTRICT _data_pdfs_10_20_34 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
+      const double streamed_4 = _data_pdfs_10_20_34[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+      double * RESTRICT _data_pdfs_10_2m1_35 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3;
+      const double streamed_5 = _data_pdfs_10_2m1_35[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_10_21_36 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
+      const double streamed_6 = _data_pdfs_10_21_36[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_1m1_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
+      const double streamed_7 = _data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+      double * RESTRICT _data_pdfs_1m1_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
+      const double streamed_8 = _data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+      double * RESTRICT _data_pdfs_11_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
+      const double streamed_9 = _data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+      double * RESTRICT _data_pdfs_11_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
+      const double streamed_10 = _data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+      double * RESTRICT _data_pdfs_1m1_2m1_311 = _data_pdfs + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3;
+      const double streamed_11 = _data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_11_2m1_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3;
+      const double streamed_12 = _data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_10_2m1_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3;
+      const double streamed_13 = _data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+      double * RESTRICT _data_pdfs_10_2m1_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3;
+      const double streamed_14 = _data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+      double * RESTRICT _data_pdfs_1m1_21_315 = _data_pdfs + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3;
+      const double streamed_15 = _data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_11_21_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3;
+      const double streamed_16 = _data_pdfs_11_21_316[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_10_21_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3;
+      const double streamed_17 = _data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+      double * RESTRICT _data_pdfs_10_21_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3;
+      const double streamed_18 = _data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+      double * RESTRICT  _data_pdfs_tmp_10_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2;
+      _data_pdfs_tmp_10_20_30[_stride_pdfs_tmp_0*ctr_0] = streamed_0;
+      double * RESTRICT  _data_pdfs_tmp_10_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_31[_stride_pdfs_tmp_0*ctr_0] = streamed_1;
+      double * RESTRICT  _data_pdfs_tmp_10_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_32[_stride_pdfs_tmp_0*ctr_0] = streamed_2;
+      double * RESTRICT  _data_pdfs_tmp_10_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_33[_stride_pdfs_tmp_0*ctr_0] = streamed_3;
+      double * RESTRICT  _data_pdfs_tmp_10_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_34[_stride_pdfs_tmp_0*ctr_0] = streamed_4;
+      double * RESTRICT  _data_pdfs_tmp_10_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_35[_stride_pdfs_tmp_0*ctr_0] = streamed_5;
+      double * RESTRICT  _data_pdfs_tmp_10_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_36[_stride_pdfs_tmp_0*ctr_0] = streamed_6;
+      double * RESTRICT  _data_pdfs_tmp_10_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_37[_stride_pdfs_tmp_0*ctr_0] = streamed_7;
+      double * RESTRICT  _data_pdfs_tmp_10_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_38[_stride_pdfs_tmp_0*ctr_0] = streamed_8;
+      double * RESTRICT  _data_pdfs_tmp_10_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_39[_stride_pdfs_tmp_0*ctr_0] = streamed_9;
+      double * RESTRICT  _data_pdfs_tmp_10_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_310[_stride_pdfs_tmp_0*ctr_0] = streamed_10;
+      double * RESTRICT  _data_pdfs_tmp_10_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_311[_stride_pdfs_tmp_0*ctr_0] = streamed_11;
+      double * RESTRICT  _data_pdfs_tmp_10_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_312[_stride_pdfs_tmp_0*ctr_0] = streamed_12;
+      double * RESTRICT  _data_pdfs_tmp_10_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_313[_stride_pdfs_tmp_0*ctr_0] = streamed_13;
+      double * RESTRICT  _data_pdfs_tmp_10_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_314[_stride_pdfs_tmp_0*ctr_0] = streamed_14;
+      double * RESTRICT  _data_pdfs_tmp_10_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_315[_stride_pdfs_tmp_0*ctr_0] = streamed_15;
+      double * RESTRICT  _data_pdfs_tmp_10_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_316[_stride_pdfs_tmp_0*ctr_0] = streamed_16;
+      double * RESTRICT  _data_pdfs_tmp_10_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_317[_stride_pdfs_tmp_0*ctr_0] = streamed_17;
+      double * RESTRICT  _data_pdfs_tmp_10_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3;
+      _data_pdfs_tmp_10_20_318[_stride_pdfs_tmp_0*ctr_0] = streamed_18;
+   } 
+}
+}
+
+
+namespace internal_sweepcollection_kernel_initialise {
+static FUNC_PREFIX __launch_bounds__(256) void sweepcollection_kernel_initialise(double * RESTRICT  _data_pdfs, double * RESTRICT const _data_vel, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_vel_0, int64_t const _stride_vel_1, int64_t const _stride_vel_2, int64_t const _stride_vel_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      const double rho = 1.0;
+      double * RESTRICT _data_vel_10_20_30 = _data_vel + _stride_vel_1*ctr_1 + _stride_vel_2*ctr_2;
+      const double u_0 = _data_vel_10_20_30[_stride_vel_0*ctr_0];
+      double * RESTRICT _data_vel_10_20_31 = _data_vel + _stride_vel_1*ctr_1 + _stride_vel_2*ctr_2 + _stride_vel_3;
+      const double u_1 = _data_vel_10_20_31[_stride_vel_0*ctr_0];
+      double * RESTRICT _data_vel_10_20_32 = _data_vel + _stride_vel_1*ctr_1 + _stride_vel_2*ctr_2 + 2*_stride_vel_3;
+      const double u_2 = _data_vel_10_20_32[_stride_vel_0*ctr_0];
+      double * RESTRICT  _data_pdfs_10_20_30 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2;
+      _data_pdfs_10_20_30[_stride_pdfs_0*ctr_0] = rho*0.33333333333333331 - 0.5*(u_0*u_0) - 0.5*(u_1*u_1) - 0.5*(u_2*u_2);
+      double * RESTRICT  _data_pdfs_10_20_31 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
+      _data_pdfs_10_20_31[_stride_pdfs_0*ctr_0] = rho*0.055555555555555552 + u_1*0.16666666666666666 - 0.083333333333333329*(u_0*u_0) - 0.083333333333333329*(u_2*u_2) + 0.16666666666666666*(u_1*u_1);
+      double * RESTRICT  _data_pdfs_10_20_32 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
+      _data_pdfs_10_20_32[_stride_pdfs_0*ctr_0] = rho*0.055555555555555552 + u_1*-0.16666666666666666 - 0.083333333333333329*(u_0*u_0) - 0.083333333333333329*(u_2*u_2) + 0.16666666666666666*(u_1*u_1);
+      double * RESTRICT  _data_pdfs_10_20_33 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
+      _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0] = rho*0.055555555555555552 + u_0*-0.16666666666666666 - 0.083333333333333329*(u_1*u_1) - 0.083333333333333329*(u_2*u_2) + 0.16666666666666666*(u_0*u_0);
+      double * RESTRICT  _data_pdfs_10_20_34 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
+      _data_pdfs_10_20_34[_stride_pdfs_0*ctr_0] = rho*0.055555555555555552 + u_0*0.16666666666666666 - 0.083333333333333329*(u_1*u_1) - 0.083333333333333329*(u_2*u_2) + 0.16666666666666666*(u_0*u_0);
+      double * RESTRICT  _data_pdfs_10_20_35 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
+      _data_pdfs_10_20_35[_stride_pdfs_0*ctr_0] = rho*0.055555555555555552 + u_2*0.16666666666666666 - 0.083333333333333329*(u_0*u_0) - 0.083333333333333329*(u_1*u_1) + 0.16666666666666666*(u_2*u_2);
+      double * RESTRICT  _data_pdfs_10_20_36 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
+      _data_pdfs_10_20_36[_stride_pdfs_0*ctr_0] = rho*0.055555555555555552 + u_2*-0.16666666666666666 - 0.083333333333333329*(u_0*u_0) - 0.083333333333333329*(u_1*u_1) + 0.16666666666666666*(u_2*u_2);
+      double * RESTRICT  _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
+      _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0] = rho*0.027777777777777776 + u_0*u_1*-0.25 + u_0*-0.083333333333333329 + u_1*0.083333333333333329 - 0.041666666666666664*(u_2*u_2) + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1);
+      double * RESTRICT  _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
+      _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0] = rho*0.027777777777777776 + u_0*u_1*0.25 + u_0*0.083333333333333329 + u_1*0.083333333333333329 - 0.041666666666666664*(u_2*u_2) + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1);
+      double * RESTRICT  _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
+      _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0] = rho*0.027777777777777776 + u_0*u_1*0.25 + u_0*-0.083333333333333329 + u_1*-0.083333333333333329 - 0.041666666666666664*(u_2*u_2) + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1);
+      double * RESTRICT  _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
+      _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0] = rho*0.027777777777777776 + u_0*u_1*-0.25 + u_0*0.083333333333333329 + u_1*-0.083333333333333329 - 0.041666666666666664*(u_2*u_2) + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1);
+      double * RESTRICT  _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
+      _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0] = rho*0.027777777777777776 + u_1*u_2*0.25 + u_1*0.083333333333333329 + u_2*0.083333333333333329 - 0.041666666666666664*(u_0*u_0) + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2);
+      double * RESTRICT  _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
+      _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0] = rho*0.027777777777777776 + u_1*u_2*-0.25 + u_1*-0.083333333333333329 + u_2*0.083333333333333329 - 0.041666666666666664*(u_0*u_0) + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2);
+      double * RESTRICT  _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
+      _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0] = rho*0.027777777777777776 + u_0*u_2*-0.25 + u_0*-0.083333333333333329 + u_2*0.083333333333333329 - 0.041666666666666664*(u_1*u_1) + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2);
+      double * RESTRICT  _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
+      _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0] = rho*0.027777777777777776 + u_0*u_2*0.25 + u_0*0.083333333333333329 + u_2*0.083333333333333329 - 0.041666666666666664*(u_1*u_1) + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2);
+      double * RESTRICT  _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
+      _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0] = rho*0.027777777777777776 + u_1*u_2*-0.25 + u_1*0.083333333333333329 + u_2*-0.083333333333333329 - 0.041666666666666664*(u_0*u_0) + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2);
+      double * RESTRICT  _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
+      _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0] = rho*0.027777777777777776 + u_1*u_2*0.25 + u_1*-0.083333333333333329 + u_2*-0.083333333333333329 - 0.041666666666666664*(u_0*u_0) + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2);
+      double * RESTRICT  _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
+      _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0] = rho*0.027777777777777776 + u_0*u_2*0.25 + u_0*-0.083333333333333329 + u_2*-0.083333333333333329 - 0.041666666666666664*(u_1*u_1) + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2);
+      double * RESTRICT  _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
+      _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0] = rho*0.027777777777777776 + u_0*u_2*-0.25 + u_0*0.083333333333333329 + u_2*-0.083333333333333329 - 0.041666666666666664*(u_1*u_1) + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2);
+   } 
+}
+}
+
+
+namespace internal_sweepcollection_kernel_getter {
+static FUNC_PREFIX __launch_bounds__(256) void sweepcollection_kernel_getter(double * RESTRICT const _data_pdfs, double * RESTRICT  _data_vel, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_vel_0, int64_t const _stride_vel_1, int64_t const _stride_vel_2, int64_t const _stride_vel_3)
+{
+   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
+   {
+      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
+      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
+      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
+      double * RESTRICT _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_10_20_34 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
+      const double vel0Term = _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0] + _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0] + _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0] + _data_pdfs_10_20_34[_stride_pdfs_0*ctr_0] + _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_10_20_33 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
+      const double momdensity_0 = vel0Term - 1.0*_data_pdfs_10_20_313[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_10_20_317[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_10_20_33[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_10_20_37[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_10_20_39[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_10_20_31 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
+      double * RESTRICT _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
+      const double vel1Term = _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0] + _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0] + _data_pdfs_10_20_31[_stride_pdfs_0*ctr_0] + _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_10_20_32 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
+      const double momdensity_1 = vel1Term - 1.0*_data_pdfs_10_20_310[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_10_20_312[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_10_20_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_10_20_32[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_10_20_39[_stride_pdfs_0*ctr_0] + _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_10_20_35 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
+      const double vel2Term = _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0] + _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0] + _data_pdfs_10_20_35[_stride_pdfs_0*ctr_0];
+      double * RESTRICT _data_pdfs_10_20_36 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
+      const double momdensity_2 = vel2Term - 1.0*_data_pdfs_10_20_315[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_10_20_316[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_10_20_317[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_10_20_318[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_10_20_36[_stride_pdfs_0*ctr_0] + _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0] + _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0];
+      const double u_0 = momdensity_0;
+      const double u_1 = momdensity_1;
+      const double u_2 = momdensity_2;
+      double * RESTRICT  _data_vel_10_20_30 = _data_vel + _stride_vel_1*ctr_1 + _stride_vel_2*ctr_2;
+      _data_vel_10_20_30[_stride_vel_0*ctr_0] = u_0;
+      double * RESTRICT  _data_vel_10_20_31 = _data_vel + _stride_vel_1*ctr_1 + _stride_vel_2*ctr_2 + _stride_vel_3;
+      _data_vel_10_20_31[_stride_vel_0*ctr_0] = u_1;
+      double * RESTRICT  _data_vel_10_20_32 = _data_vel + _stride_vel_1*ctr_1 + _stride_vel_2*ctr_2 + 2*_stride_vel_3;
+      _data_vel_10_20_32[_stride_vel_0*ctr_0] = u_2;
+   } 
+}
+}
+
+
+
+
+
+void SweepCollection::streamCollide( gpu::GPUField<double> * pdfs, gpu::GPUField<double> * pdfs_tmp, gpu::GPUField<double> * vel, double omega, const cell_idx_t ghost_layers, gpuStream_t stream )
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT const _data_pdfs = pdfs->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs_tmp->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs_tmp = pdfs_tmp->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(vel->nrOfGhostLayers()))
+   double * RESTRICT  _data_vel = vel->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
+   const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+   const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+   const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+   const int64_t _stride_vel_0 = int64_t(vel->xStride());
+   const int64_t _stride_vel_1 = int64_t(vel->yStride());
+   const int64_t _stride_vel_2 = int64_t(vel->zStride());
+   const int64_t _stride_vel_3 = int64_t(1 * int64_t(vel->fStride()));
+   dim3 _block(uint32_c(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))));
+   dim3 _grid(uint32_c(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), uint32_c(( (_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) ) +1 )), uint32_c(( (_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) ) +1 )));
+   internal_sweepcollection_kernel_streamCollide::sweepcollection_kernel_streamCollide<<<_grid, _block, 0, stream>>>(_data_pdfs, _data_pdfs_tmp, _data_vel, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3, _stride_vel_0, _stride_vel_1, _stride_vel_2, _stride_vel_3, omega);
+}
+void SweepCollection::streamCollideCellInterval( gpu::GPUField<double> * pdfs, gpu::GPUField<double> * pdfs_tmp, gpu::GPUField<double> * vel, double omega, const CellInterval & ci, gpuStream_t stream)
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs_tmp = pdfs_tmp->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(vel->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(vel->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(vel->nrOfGhostLayers()))
+   double * RESTRICT  _data_vel = vel->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
+   const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+   const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+   const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+   const int64_t _stride_vel_0 = int64_t(vel->xStride());
+   const int64_t _stride_vel_1 = int64_t(vel->yStride());
+   const int64_t _stride_vel_2 = int64_t(vel->zStride());
+   const int64_t _stride_vel_3 = int64_t(1 * int64_t(vel->fStride()));
+   dim3 _block(uint32_c(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))));
+   dim3 _grid(uint32_c(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), uint32_c(( (_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) ) +1 )), uint32_c(( (_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) ) +1 )));
+   internal_sweepcollection_kernel_streamCollide::sweepcollection_kernel_streamCollide<<<_grid, _block, 0, stream>>>(_data_pdfs, _data_pdfs_tmp, _data_vel, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3, _stride_vel_0, _stride_vel_1, _stride_vel_2, _stride_vel_3, omega);
+}
+
+void SweepCollection::collide( gpu::GPUField<double> * pdfs, gpu::GPUField<double> * vel, double omega, const cell_idx_t ghost_layers, gpuStream_t stream )
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs = pdfs->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(vel->nrOfGhostLayers()))
+   double * RESTRICT  _data_vel = vel->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_vel_0 = int64_t(vel->xStride());
+   const int64_t _stride_vel_1 = int64_t(vel->yStride());
+   const int64_t _stride_vel_2 = int64_t(vel->zStride());
+   const int64_t _stride_vel_3 = int64_t(1 * int64_t(vel->fStride()));
+   dim3 _block(uint32_c(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))));
+   dim3 _grid(uint32_c(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), uint32_c(( (_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) ) +1 )), uint32_c(( (_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) ) +1 )));
+   internal_sweepcollection_kernel_collide::sweepcollection_kernel_collide<<<_grid, _block, 0, stream>>>(_data_pdfs, _data_vel, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_vel_0, _stride_vel_1, _stride_vel_2, _stride_vel_3, omega);
+}
+void SweepCollection::collideCellInterval( gpu::GPUField<double> * pdfs, gpu::GPUField<double> * vel, double omega, const CellInterval & ci, gpuStream_t stream)
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(vel->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(vel->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(vel->nrOfGhostLayers()))
+   double * RESTRICT  _data_vel = vel->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_vel_0 = int64_t(vel->xStride());
+   const int64_t _stride_vel_1 = int64_t(vel->yStride());
+   const int64_t _stride_vel_2 = int64_t(vel->zStride());
+   const int64_t _stride_vel_3 = int64_t(1 * int64_t(vel->fStride()));
+   dim3 _block(uint32_c(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))));
+   dim3 _grid(uint32_c(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), uint32_c(( (_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) ) +1 )), uint32_c(( (_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) ) +1 )));
+   internal_sweepcollection_kernel_collide::sweepcollection_kernel_collide<<<_grid, _block, 0, stream>>>(_data_pdfs, _data_vel, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_vel_0, _stride_vel_1, _stride_vel_2, _stride_vel_3, omega);
+}
+
+void SweepCollection::stream( gpu::GPUField<double> * pdfs, gpu::GPUField<double> * pdfs_tmp, const cell_idx_t ghost_layers, gpuStream_t stream )
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT const _data_pdfs = pdfs->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs_tmp->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs_tmp = pdfs_tmp->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
+   const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+   const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+   const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+   dim3 _block(uint32_c(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))));
+   dim3 _grid(uint32_c(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), uint32_c(( (_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) ) +1 )), uint32_c(( (_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) ) +1 )));
+   internal_sweepcollection_kernel_stream::sweepcollection_kernel_stream<<<_grid, _block, 0, stream>>>(_data_pdfs, _data_pdfs_tmp, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3);
+}
+void SweepCollection::streamCellInterval( gpu::GPUField<double> * pdfs, gpu::GPUField<double> * pdfs_tmp, const CellInterval & ci, gpuStream_t stream)
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs_tmp = pdfs_tmp->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
+   const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+   const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+   const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+   dim3 _block(uint32_c(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))));
+   dim3 _grid(uint32_c(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), uint32_c(( (_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) ) +1 )), uint32_c(( (_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) ) +1 )));
+   internal_sweepcollection_kernel_stream::sweepcollection_kernel_stream<<<_grid, _block, 0, stream>>>(_data_pdfs, _data_pdfs_tmp, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3);
+}
+
+void SweepCollection::streamOnlyNoAdvancement( gpu::GPUField<double> * pdfs, gpu::GPUField<double> * pdfs_tmp, const cell_idx_t ghost_layers, gpuStream_t stream )
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT const _data_pdfs = pdfs->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs_tmp->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs_tmp = pdfs_tmp->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
+   const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+   const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+   const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+   dim3 _block(uint32_c(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))));
+   dim3 _grid(uint32_c(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), uint32_c(( (_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) ) +1 )), uint32_c(( (_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) ) +1 )));
+   internal_sweepcollection_kernel_streamOnlyNoAdvancement::sweepcollection_kernel_streamOnlyNoAdvancement<<<_grid, _block, 0, stream>>>(_data_pdfs, _data_pdfs_tmp, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3);
+}
+void SweepCollection::streamOnlyNoAdvancementCellInterval( gpu::GPUField<double> * pdfs, gpu::GPUField<double> * pdfs_tmp, const CellInterval & ci, gpuStream_t stream)
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs_tmp = pdfs_tmp->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
+   const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+   const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+   const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+   dim3 _block(uint32_c(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))));
+   dim3 _grid(uint32_c(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), uint32_c(( (_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) ) +1 )), uint32_c(( (_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) ) +1 )));
+   internal_sweepcollection_kernel_streamOnlyNoAdvancement::sweepcollection_kernel_streamOnlyNoAdvancement<<<_grid, _block, 0, stream>>>(_data_pdfs, _data_pdfs_tmp, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3);
+}
+
+void SweepCollection::initialise( gpu::GPUField<double> * pdfs, gpu::GPUField<double> * vel, const cell_idx_t ghost_layers, gpuStream_t stream )
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs = pdfs->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(vel->nrOfGhostLayers()))
+   double * RESTRICT const _data_vel = vel->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_vel_0 = int64_t(vel->xStride());
+   const int64_t _stride_vel_1 = int64_t(vel->yStride());
+   const int64_t _stride_vel_2 = int64_t(vel->zStride());
+   const int64_t _stride_vel_3 = int64_t(1 * int64_t(vel->fStride()));
+   dim3 _block(uint32_c(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))));
+   dim3 _grid(uint32_c(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), uint32_c(( (_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) ) +1 )), uint32_c(( (_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) ) +1 )));
+   internal_sweepcollection_kernel_initialise::sweepcollection_kernel_initialise<<<_grid, _block, 0, stream>>>(_data_pdfs, _data_vel, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_vel_0, _stride_vel_1, _stride_vel_2, _stride_vel_3);
+}
+void SweepCollection::initialiseCellInterval( gpu::GPUField<double> * pdfs, gpu::GPUField<double> * vel, const CellInterval & ci, gpuStream_t stream)
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(vel->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(vel->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(vel->nrOfGhostLayers()))
+   double * RESTRICT const _data_vel = vel->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_vel_0 = int64_t(vel->xStride());
+   const int64_t _stride_vel_1 = int64_t(vel->yStride());
+   const int64_t _stride_vel_2 = int64_t(vel->zStride());
+   const int64_t _stride_vel_3 = int64_t(1 * int64_t(vel->fStride()));
+   dim3 _block(uint32_c(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))));
+   dim3 _grid(uint32_c(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), uint32_c(( (_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) ) +1 )), uint32_c(( (_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) ) +1 )));
+   internal_sweepcollection_kernel_initialise::sweepcollection_kernel_initialise<<<_grid, _block, 0, stream>>>(_data_pdfs, _data_vel, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_vel_0, _stride_vel_1, _stride_vel_2, _stride_vel_3);
+}
+
+void SweepCollection::calculateMacroscopicParameters( gpu::GPUField<double> * pdfs, gpu::GPUField<double> * vel, const cell_idx_t ghost_layers, gpuStream_t stream )
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT const _data_pdfs = pdfs->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(vel->nrOfGhostLayers()))
+   double * RESTRICT  _data_vel = vel->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_vel_0 = int64_t(vel->xStride());
+   const int64_t _stride_vel_1 = int64_t(vel->yStride());
+   const int64_t _stride_vel_2 = int64_t(vel->zStride());
+   const int64_t _stride_vel_3 = int64_t(1 * int64_t(vel->fStride()));
+   dim3 _block(uint32_c(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))));
+   dim3 _grid(uint32_c(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), uint32_c(( (_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) ) +1 )), uint32_c(( (_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) ) +1 )));
+   internal_sweepcollection_kernel_getter::sweepcollection_kernel_getter<<<_grid, _block, 0, stream>>>(_data_pdfs, _data_vel, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_vel_0, _stride_vel_1, _stride_vel_2, _stride_vel_3);
+}
+void SweepCollection::calculateMacroscopicParametersCellInterval( gpu::GPUField<double> * pdfs, gpu::GPUField<double> * vel, const CellInterval & ci, gpuStream_t stream)
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(vel->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(vel->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(vel->nrOfGhostLayers()))
+   double * RESTRICT  _data_vel = vel->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_vel_0 = int64_t(vel->xStride());
+   const int64_t _stride_vel_1 = int64_t(vel->yStride());
+   const int64_t _stride_vel_2 = int64_t(vel->zStride());
+   const int64_t _stride_vel_3 = int64_t(1 * int64_t(vel->fStride()));
+   dim3 _block(uint32_c(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))));
+   dim3 _grid(uint32_c(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), uint32_c(( (_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))) ) +1 )), uint32_c(( (_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)*((_size_pdfs_1 < 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)))) ? _size_pdfs_1 : 16*((int64_t)(16) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0))))))))) ) +1 )));
+   internal_sweepcollection_kernel_getter::sweepcollection_kernel_getter<<<_grid, _block, 0, stream>>>(_data_pdfs, _data_vel, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_vel_0, _stride_vel_1, _stride_vel_2, _stride_vel_3);
+}
+
+
+
+} // namespace lbm
+} // namespace walberla
+
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic pop
+#endif
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_INTEL )
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/apps/benchmarks/GridRefSphere/SweepCollection.h b/apps/benchmarks/GridRefSphere/SweepCollection.h
new file mode 100644
index 000000000..0e9d41edc
--- /dev/null
+++ b/apps/benchmarks/GridRefSphere/SweepCollection.h
@@ -0,0 +1,1428 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file SweepCollection.h
+//! \\author pystencils
+//======================================================================================================================
+
+#pragma once
+
+#include "core/DataTypes.h"
+#include "core/logging/Logging.h"
+#include "core/Macros.h"
+
+#include "gpu/GPUField.h"
+#include "gpu/ParallelStreams.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+
+#include "field/SwapableCompare.h"
+#include "field/GhostLayerField.h"
+
+#include <set>
+#include <cmath>
+
+
+
+using namespace std::placeholders;
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Wunused-parameter"
+#   pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace lbm {
+
+
+class SweepCollection
+{
+public:
+  enum Type { ALL = 0, INNER = 1, OUTER = 2 };
+
+   SweepCollection(const shared_ptr< StructuredBlockStorage > & blocks, BlockDataID pdfsID_, BlockDataID velID_, double omega, const Cell & outerWidth=Cell(1, 1, 1))
+     : blocks_(blocks), pdfsID(pdfsID_), velID(velID_), outerWidth_(outerWidth)
+   {
+      
+      for (uint_t level = 0; level < blocks->getNumberOfLevels(); level++)
+      {
+          const double level_scale_factor = double(uint_t(1) << level);
+          const double one                = double(1.0);
+          const double half               = double(0.5);
+          
+          omegaVector.push_back( double(omega / (level_scale_factor * (-omega * half + one) + omega * half)) );
+      }
+
+
+      for (auto& iBlock : *blocks)
+      {
+         if (int_c(blocks->getNumberOfXCells(iBlock)) <= outerWidth_[0] * 2 ||
+             int_c(blocks->getNumberOfYCells(iBlock)) <= outerWidth_[1] * 2 ||
+             int_c(blocks->getNumberOfZCells(iBlock)) <= outerWidth_[2] * 2)
+          WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller or increase cellsPerBlock")
+      }
+   };
+
+   
+    ~SweepCollection() {  
+        for(auto p: cache_pdfs_) {
+            delete p;
+        }
+     }
+
+
+   /*************************************************************************************
+   *                Internal Function Definitions with raw Pointer
+   *************************************************************************************/
+   static void streamCollide (gpu::GPUField<double> * pdfs, gpu::GPUField<double> * pdfs_tmp, gpu::GPUField<double> * vel, double omega, const cell_idx_t ghost_layers = 0, gpuStream_t stream = nullptr);
+   static void streamCollideCellInterval (gpu::GPUField<double> * pdfs, gpu::GPUField<double> * pdfs_tmp, gpu::GPUField<double> * vel, double omega, const CellInterval & ci, gpuStream_t stream = nullptr);
+   
+   static void collide (gpu::GPUField<double> * pdfs, gpu::GPUField<double> * vel, double omega, const cell_idx_t ghost_layers = 0, gpuStream_t stream = nullptr);
+   static void collideCellInterval (gpu::GPUField<double> * pdfs, gpu::GPUField<double> * vel, double omega, const CellInterval & ci, gpuStream_t stream = nullptr);
+   
+   static void stream (gpu::GPUField<double> * pdfs, gpu::GPUField<double> * pdfs_tmp, const cell_idx_t ghost_layers = 0, gpuStream_t stream = nullptr);
+   static void streamCellInterval (gpu::GPUField<double> * pdfs, gpu::GPUField<double> * pdfs_tmp, const CellInterval & ci, gpuStream_t stream = nullptr);
+   
+   static void streamOnlyNoAdvancement (gpu::GPUField<double> * pdfs, gpu::GPUField<double> * pdfs_tmp, const cell_idx_t ghost_layers = 0, gpuStream_t stream = nullptr);
+   static void streamOnlyNoAdvancementCellInterval (gpu::GPUField<double> * pdfs, gpu::GPUField<double> * pdfs_tmp, const CellInterval & ci, gpuStream_t stream = nullptr);
+   
+   static void initialise (gpu::GPUField<double> * pdfs, gpu::GPUField<double> * vel, const cell_idx_t ghost_layers = 0, gpuStream_t stream = nullptr);
+   static void initialiseCellInterval (gpu::GPUField<double> * pdfs, gpu::GPUField<double> * vel, const CellInterval & ci, gpuStream_t stream = nullptr);
+   
+   static void calculateMacroscopicParameters (gpu::GPUField<double> * pdfs, gpu::GPUField<double> * vel, const cell_idx_t ghost_layers = 0, gpuStream_t stream = nullptr);
+   static void calculateMacroscopicParametersCellInterval (gpu::GPUField<double> * pdfs, gpu::GPUField<double> * vel, const CellInterval & ci, gpuStream_t stream = nullptr);
+   
+
+   /*************************************************************************************
+   *                Function Definitions for external Usage
+   *************************************************************************************/
+
+   std::function<void (IBlock *)> streamCollide()
+   {
+      return [this](IBlock* block) { streamCollide(block); };
+   }
+
+   std::function<void (IBlock *)> streamCollide(Type type)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { streamCollideInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { streamCollideOuter(block); };
+         default:
+            return [this](IBlock* block) { streamCollide(block); };
+      }
+   }
+
+   std::function<void (IBlock *)> streamCollide(Type type, const cell_idx_t ghost_layers)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { streamCollideInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { streamCollideOuter(block); };
+         default:
+            return [this, ghost_layers](IBlock* block) { streamCollide(block, ghost_layers); };
+      }
+   }
+
+   std::function<void (IBlock *)> streamCollide(Type type, const cell_idx_t ghost_layers, gpuStream_t gpuStream)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this, gpuStream](IBlock* block) { streamCollideInner(block, gpuStream); };
+         case Type::OUTER:
+            return [this, gpuStream](IBlock* block) { streamCollideOuter(block, gpuStream); };
+         default:
+            return [this, ghost_layers, gpuStream](IBlock* block) { streamCollide(block, ghost_layers, gpuStream); };
+      }
+   }
+
+   std::function<void (IBlock *)> streamCollide(Type type, gpuStream_t gpuStream)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this, gpuStream](IBlock* block) { streamCollideInner(block, gpuStream); };
+         case Type::OUTER:
+            return [this, gpuStream](IBlock* block) { streamCollideOuter(block, gpuStream); };
+         default:
+            return [this, gpuStream](IBlock* block) { streamCollide(block, cell_idx_c(0), gpuStream); };
+      }
+   }
+
+   void streamCollide(IBlock * block)
+   {
+      const cell_idx_t ghost_layers = 0;
+      gpuStream_t gpuStream = nullptr;
+
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      auto vel = block->getData< gpu::GPUField<double> >(velID);
+      gpu::GPUField<double> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      const uint_t level = block->getBlockStorage().getLevel(*block);
+      double & omega = omegaVector[level];
+      
+      streamCollide(pdfs, pdfs_tmp, vel, omega, ghost_layers, gpuStream);
+      pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+
+   void streamCollide(IBlock * block, const cell_idx_t ghost_layers)
+   {
+      gpuStream_t gpuStream = nullptr;
+
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      auto vel = block->getData< gpu::GPUField<double> >(velID);
+      gpu::GPUField<double> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      const uint_t level = block->getBlockStorage().getLevel(*block);
+      double & omega = omegaVector[level];
+      
+      streamCollide(pdfs, pdfs_tmp, vel, omega, ghost_layers, gpuStream);
+      pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+
+   void streamCollide(IBlock * block, const cell_idx_t ghost_layers, gpuStream_t gpuStream)
+   {
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      auto vel = block->getData< gpu::GPUField<double> >(velID);
+      gpu::GPUField<double> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      const uint_t level = block->getBlockStorage().getLevel(*block);
+      double & omega = omegaVector[level];
+      
+      streamCollide(pdfs, pdfs_tmp, vel, omega, ghost_layers, gpuStream);
+      pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+
+   void streamCollideCellInterval(IBlock * block, const CellInterval & ci, gpuStream_t gpuStream = nullptr)
+   {
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      auto vel = block->getData< gpu::GPUField<double> >(velID);
+      gpu::GPUField<double> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      const uint_t level = block->getBlockStorage().getLevel(*block);
+      double & omega = omegaVector[level];
+      
+      streamCollideCellInterval(pdfs, pdfs_tmp, vel, omega, ci, gpuStream);
+      pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+
+   void streamCollideInner(IBlock * block, gpuStream_t gpuStream = nullptr)
+   {
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      auto vel = block->getData< gpu::GPUField<double> >(velID);
+      gpu::GPUField<double> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      const uint_t level = block->getBlockStorage().getLevel(*block);
+      double & omega = omegaVector[level];
+      
+
+      CellInterval inner = pdfs->xyzSize();
+      inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2]));
+
+      streamCollideCellInterval(pdfs, pdfs_tmp, vel, omega, inner, gpuStream);
+   }
+
+   void streamCollideOuter(IBlock * block, gpuStream_t gpuStream = nullptr)
+   {
+
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      auto vel = block->getData< gpu::GPUField<double> >(velID);
+      gpu::GPUField<double> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      const uint_t level = block->getBlockStorage().getLevel(*block);
+      double & omega = omegaVector[level];
+      
+
+      if( layers_.empty() )
+      {
+         CellInterval ci;
+
+         pdfs->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+      }
+
+    
+      {
+         auto parallelSection_ = parallelStreams_.parallelSection( gpuStream );
+         for( auto & ci: layers_ )
+         {
+          parallelSection_.run([&]( auto s ) {
+             streamCollideCellInterval(pdfs, pdfs_tmp, vel, omega, ci, gpuStream);
+          });
+         }
+      }
+    
+
+    pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+   
+
+   std::function<void (IBlock *)> collide()
+   {
+      return [this](IBlock* block) { collide(block); };
+   }
+
+   std::function<void (IBlock *)> collide(Type type)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { collideInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { collideOuter(block); };
+         default:
+            return [this](IBlock* block) { collide(block); };
+      }
+   }
+
+   std::function<void (IBlock *)> collide(Type type, const cell_idx_t ghost_layers)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { collideInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { collideOuter(block); };
+         default:
+            return [this, ghost_layers](IBlock* block) { collide(block, ghost_layers); };
+      }
+   }
+
+   std::function<void (IBlock *)> collide(Type type, const cell_idx_t ghost_layers, gpuStream_t gpuStream)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this, gpuStream](IBlock* block) { collideInner(block, gpuStream); };
+         case Type::OUTER:
+            return [this, gpuStream](IBlock* block) { collideOuter(block, gpuStream); };
+         default:
+            return [this, ghost_layers, gpuStream](IBlock* block) { collide(block, ghost_layers, gpuStream); };
+      }
+   }
+
+   std::function<void (IBlock *)> collide(Type type, gpuStream_t gpuStream)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this, gpuStream](IBlock* block) { collideInner(block, gpuStream); };
+         case Type::OUTER:
+            return [this, gpuStream](IBlock* block) { collideOuter(block, gpuStream); };
+         default:
+            return [this, gpuStream](IBlock* block) { collide(block, cell_idx_c(0), gpuStream); };
+      }
+   }
+
+   void collide(IBlock * block)
+   {
+      const cell_idx_t ghost_layers = 0;
+      gpuStream_t gpuStream = nullptr;
+
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      auto vel = block->getData< gpu::GPUField<double> >(velID);
+
+      const uint_t level = block->getBlockStorage().getLevel(*block);
+      double & omega = omegaVector[level];
+      
+      collide(pdfs, vel, omega, ghost_layers, gpuStream);
+      
+   }
+
+   void collide(IBlock * block, const cell_idx_t ghost_layers)
+   {
+      gpuStream_t gpuStream = nullptr;
+
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      auto vel = block->getData< gpu::GPUField<double> >(velID);
+
+      const uint_t level = block->getBlockStorage().getLevel(*block);
+      double & omega = omegaVector[level];
+      
+      collide(pdfs, vel, omega, ghost_layers, gpuStream);
+      
+   }
+
+   void collide(IBlock * block, const cell_idx_t ghost_layers, gpuStream_t gpuStream)
+   {
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      auto vel = block->getData< gpu::GPUField<double> >(velID);
+
+      const uint_t level = block->getBlockStorage().getLevel(*block);
+      double & omega = omegaVector[level];
+      
+      collide(pdfs, vel, omega, ghost_layers, gpuStream);
+      
+   }
+
+   void collideCellInterval(IBlock * block, const CellInterval & ci, gpuStream_t gpuStream = nullptr)
+   {
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      auto vel = block->getData< gpu::GPUField<double> >(velID);
+
+      const uint_t level = block->getBlockStorage().getLevel(*block);
+      double & omega = omegaVector[level];
+      
+      collideCellInterval(pdfs, vel, omega, ci, gpuStream);
+      
+   }
+
+   void collideInner(IBlock * block, gpuStream_t gpuStream = nullptr)
+   {
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      auto vel = block->getData< gpu::GPUField<double> >(velID);
+
+      const uint_t level = block->getBlockStorage().getLevel(*block);
+      double & omega = omegaVector[level];
+      
+
+      CellInterval inner = pdfs->xyzSize();
+      inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2]));
+
+      collideCellInterval(pdfs, vel, omega, inner, gpuStream);
+   }
+
+   void collideOuter(IBlock * block, gpuStream_t gpuStream = nullptr)
+   {
+
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      auto vel = block->getData< gpu::GPUField<double> >(velID);
+
+      const uint_t level = block->getBlockStorage().getLevel(*block);
+      double & omega = omegaVector[level];
+      
+
+      if( layers_.empty() )
+      {
+         CellInterval ci;
+
+         pdfs->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+      }
+
+    
+      {
+         auto parallelSection_ = parallelStreams_.parallelSection( gpuStream );
+         for( auto & ci: layers_ )
+         {
+          parallelSection_.run([&]( auto s ) {
+             collideCellInterval(pdfs, vel, omega, ci, gpuStream);
+          });
+         }
+      }
+    
+
+    
+   }
+   
+
+   std::function<void (IBlock *)> stream()
+   {
+      return [this](IBlock* block) { stream(block); };
+   }
+
+   std::function<void (IBlock *)> stream(Type type)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { streamInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { streamOuter(block); };
+         default:
+            return [this](IBlock* block) { stream(block); };
+      }
+   }
+
+   std::function<void (IBlock *)> stream(Type type, const cell_idx_t ghost_layers)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { streamInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { streamOuter(block); };
+         default:
+            return [this, ghost_layers](IBlock* block) { stream(block, ghost_layers); };
+      }
+   }
+
+   std::function<void (IBlock *)> stream(Type type, const cell_idx_t ghost_layers, gpuStream_t gpuStream)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this, gpuStream](IBlock* block) { streamInner(block, gpuStream); };
+         case Type::OUTER:
+            return [this, gpuStream](IBlock* block) { streamOuter(block, gpuStream); };
+         default:
+            return [this, ghost_layers, gpuStream](IBlock* block) { stream(block, ghost_layers, gpuStream); };
+      }
+   }
+
+   std::function<void (IBlock *)> stream(Type type, gpuStream_t gpuStream)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this, gpuStream](IBlock* block) { streamInner(block, gpuStream); };
+         case Type::OUTER:
+            return [this, gpuStream](IBlock* block) { streamOuter(block, gpuStream); };
+         default:
+            return [this, gpuStream](IBlock* block) { stream(block, cell_idx_c(0), gpuStream); };
+      }
+   }
+
+   void stream(IBlock * block)
+   {
+      const cell_idx_t ghost_layers = 0;
+      gpuStream_t gpuStream = nullptr;
+
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      gpu::GPUField<double> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+      stream(pdfs, pdfs_tmp, ghost_layers, gpuStream);
+      pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+
+   void stream(IBlock * block, const cell_idx_t ghost_layers)
+   {
+      gpuStream_t gpuStream = nullptr;
+
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      gpu::GPUField<double> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+      stream(pdfs, pdfs_tmp, ghost_layers, gpuStream);
+      pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+
+   void stream(IBlock * block, const cell_idx_t ghost_layers, gpuStream_t gpuStream)
+   {
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      gpu::GPUField<double> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+      stream(pdfs, pdfs_tmp, ghost_layers, gpuStream);
+      pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+
+   void streamCellInterval(IBlock * block, const CellInterval & ci, gpuStream_t gpuStream = nullptr)
+   {
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      gpu::GPUField<double> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+      streamCellInterval(pdfs, pdfs_tmp, ci, gpuStream);
+      pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+
+   void streamInner(IBlock * block, gpuStream_t gpuStream = nullptr)
+   {
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      gpu::GPUField<double> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+
+      CellInterval inner = pdfs->xyzSize();
+      inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2]));
+
+      streamCellInterval(pdfs, pdfs_tmp, inner, gpuStream);
+   }
+
+   void streamOuter(IBlock * block, gpuStream_t gpuStream = nullptr)
+   {
+
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      gpu::GPUField<double> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+
+      if( layers_.empty() )
+      {
+         CellInterval ci;
+
+         pdfs->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+      }
+
+    
+      {
+         auto parallelSection_ = parallelStreams_.parallelSection( gpuStream );
+         for( auto & ci: layers_ )
+         {
+          parallelSection_.run([&]( auto s ) {
+             streamCellInterval(pdfs, pdfs_tmp, ci, gpuStream);
+          });
+         }
+      }
+    
+
+    pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+   
+
+   std::function<void (IBlock *)> streamOnlyNoAdvancement()
+   {
+      return [this](IBlock* block) { streamOnlyNoAdvancement(block); };
+   }
+
+   std::function<void (IBlock *)> streamOnlyNoAdvancement(Type type)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); };
+         default:
+            return [this](IBlock* block) { streamOnlyNoAdvancement(block); };
+      }
+   }
+
+   std::function<void (IBlock *)> streamOnlyNoAdvancement(Type type, const cell_idx_t ghost_layers)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); };
+         default:
+            return [this, ghost_layers](IBlock* block) { streamOnlyNoAdvancement(block, ghost_layers); };
+      }
+   }
+
+   std::function<void (IBlock *)> streamOnlyNoAdvancement(Type type, const cell_idx_t ghost_layers, gpuStream_t gpuStream)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this, gpuStream](IBlock* block) { streamOnlyNoAdvancementInner(block, gpuStream); };
+         case Type::OUTER:
+            return [this, gpuStream](IBlock* block) { streamOnlyNoAdvancementOuter(block, gpuStream); };
+         default:
+            return [this, ghost_layers, gpuStream](IBlock* block) { streamOnlyNoAdvancement(block, ghost_layers, gpuStream); };
+      }
+   }
+
+   std::function<void (IBlock *)> streamOnlyNoAdvancement(Type type, gpuStream_t gpuStream)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this, gpuStream](IBlock* block) { streamOnlyNoAdvancementInner(block, gpuStream); };
+         case Type::OUTER:
+            return [this, gpuStream](IBlock* block) { streamOnlyNoAdvancementOuter(block, gpuStream); };
+         default:
+            return [this, gpuStream](IBlock* block) { streamOnlyNoAdvancement(block, cell_idx_c(0), gpuStream); };
+      }
+   }
+
+   void streamOnlyNoAdvancement(IBlock * block)
+   {
+      const cell_idx_t ghost_layers = 0;
+      gpuStream_t gpuStream = nullptr;
+
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      gpu::GPUField<double> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+      streamOnlyNoAdvancement(pdfs, pdfs_tmp, ghost_layers, gpuStream);
+      
+   }
+
+   void streamOnlyNoAdvancement(IBlock * block, const cell_idx_t ghost_layers)
+   {
+      gpuStream_t gpuStream = nullptr;
+
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      gpu::GPUField<double> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+      streamOnlyNoAdvancement(pdfs, pdfs_tmp, ghost_layers, gpuStream);
+      
+   }
+
+   void streamOnlyNoAdvancement(IBlock * block, const cell_idx_t ghost_layers, gpuStream_t gpuStream)
+   {
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      gpu::GPUField<double> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+      streamOnlyNoAdvancement(pdfs, pdfs_tmp, ghost_layers, gpuStream);
+      
+   }
+
+   void streamOnlyNoAdvancementCellInterval(IBlock * block, const CellInterval & ci, gpuStream_t gpuStream = nullptr)
+   {
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      gpu::GPUField<double> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+      streamOnlyNoAdvancementCellInterval(pdfs, pdfs_tmp, ci, gpuStream);
+      
+   }
+
+   void streamOnlyNoAdvancementInner(IBlock * block, gpuStream_t gpuStream = nullptr)
+   {
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      gpu::GPUField<double> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+
+      CellInterval inner = pdfs->xyzSize();
+      inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2]));
+
+      streamOnlyNoAdvancementCellInterval(pdfs, pdfs_tmp, inner, gpuStream);
+   }
+
+   void streamOnlyNoAdvancementOuter(IBlock * block, gpuStream_t gpuStream = nullptr)
+   {
+
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      gpu::GPUField<double> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+
+      if( layers_.empty() )
+      {
+         CellInterval ci;
+
+         pdfs->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+      }
+
+    
+      {
+         auto parallelSection_ = parallelStreams_.parallelSection( gpuStream );
+         for( auto & ci: layers_ )
+         {
+          parallelSection_.run([&]( auto s ) {
+             streamOnlyNoAdvancementCellInterval(pdfs, pdfs_tmp, ci, gpuStream);
+          });
+         }
+      }
+    
+
+    
+   }
+   
+
+   std::function<void (IBlock *)> initialise()
+   {
+      return [this](IBlock* block) { initialise(block); };
+   }
+
+   std::function<void (IBlock *)> initialise(Type type)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { initialiseInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { initialiseOuter(block); };
+         default:
+            return [this](IBlock* block) { initialise(block); };
+      }
+   }
+
+   std::function<void (IBlock *)> initialise(Type type, const cell_idx_t ghost_layers)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { initialiseInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { initialiseOuter(block); };
+         default:
+            return [this, ghost_layers](IBlock* block) { initialise(block, ghost_layers); };
+      }
+   }
+
+   std::function<void (IBlock *)> initialise(Type type, const cell_idx_t ghost_layers, gpuStream_t gpuStream)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this, gpuStream](IBlock* block) { initialiseInner(block, gpuStream); };
+         case Type::OUTER:
+            return [this, gpuStream](IBlock* block) { initialiseOuter(block, gpuStream); };
+         default:
+            return [this, ghost_layers, gpuStream](IBlock* block) { initialise(block, ghost_layers, gpuStream); };
+      }
+   }
+
+   std::function<void (IBlock *)> initialise(Type type, gpuStream_t gpuStream)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this, gpuStream](IBlock* block) { initialiseInner(block, gpuStream); };
+         case Type::OUTER:
+            return [this, gpuStream](IBlock* block) { initialiseOuter(block, gpuStream); };
+         default:
+            return [this, gpuStream](IBlock* block) { initialise(block, cell_idx_c(0), gpuStream); };
+      }
+   }
+
+   void initialise(IBlock * block)
+   {
+      const cell_idx_t ghost_layers = 0;
+      gpuStream_t gpuStream = nullptr;
+
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      auto vel = block->getData< gpu::GPUField<double> >(velID);
+
+      
+      
+      initialise(pdfs, vel, ghost_layers, gpuStream);
+      
+   }
+
+   void initialise(IBlock * block, const cell_idx_t ghost_layers)
+   {
+      gpuStream_t gpuStream = nullptr;
+
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      auto vel = block->getData< gpu::GPUField<double> >(velID);
+
+      
+      
+      initialise(pdfs, vel, ghost_layers, gpuStream);
+      
+   }
+
+   void initialise(IBlock * block, const cell_idx_t ghost_layers, gpuStream_t gpuStream)
+   {
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      auto vel = block->getData< gpu::GPUField<double> >(velID);
+
+      
+      
+      initialise(pdfs, vel, ghost_layers, gpuStream);
+      
+   }
+
+   void initialiseCellInterval(IBlock * block, const CellInterval & ci, gpuStream_t gpuStream = nullptr)
+   {
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      auto vel = block->getData< gpu::GPUField<double> >(velID);
+
+      
+      
+      initialiseCellInterval(pdfs, vel, ci, gpuStream);
+      
+   }
+
+   void initialiseInner(IBlock * block, gpuStream_t gpuStream = nullptr)
+   {
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      auto vel = block->getData< gpu::GPUField<double> >(velID);
+
+      
+      
+
+      CellInterval inner = pdfs->xyzSize();
+      inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2]));
+
+      initialiseCellInterval(pdfs, vel, inner, gpuStream);
+   }
+
+   void initialiseOuter(IBlock * block, gpuStream_t gpuStream = nullptr)
+   {
+
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      auto vel = block->getData< gpu::GPUField<double> >(velID);
+
+      
+      
+
+      if( layers_.empty() )
+      {
+         CellInterval ci;
+
+         pdfs->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+      }
+
+    
+      {
+         auto parallelSection_ = parallelStreams_.parallelSection( gpuStream );
+         for( auto & ci: layers_ )
+         {
+          parallelSection_.run([&]( auto s ) {
+             initialiseCellInterval(pdfs, vel, ci, gpuStream);
+          });
+         }
+      }
+    
+
+    
+   }
+   
+
+   std::function<void (IBlock *)> calculateMacroscopicParameters()
+   {
+      return [this](IBlock* block) { calculateMacroscopicParameters(block); };
+   }
+
+   std::function<void (IBlock *)> calculateMacroscopicParameters(Type type)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { calculateMacroscopicParametersInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); };
+         default:
+            return [this](IBlock* block) { calculateMacroscopicParameters(block); };
+      }
+   }
+
+   std::function<void (IBlock *)> calculateMacroscopicParameters(Type type, const cell_idx_t ghost_layers)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { calculateMacroscopicParametersInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); };
+         default:
+            return [this, ghost_layers](IBlock* block) { calculateMacroscopicParameters(block, ghost_layers); };
+      }
+   }
+
+   std::function<void (IBlock *)> calculateMacroscopicParameters(Type type, const cell_idx_t ghost_layers, gpuStream_t gpuStream)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this, gpuStream](IBlock* block) { calculateMacroscopicParametersInner(block, gpuStream); };
+         case Type::OUTER:
+            return [this, gpuStream](IBlock* block) { calculateMacroscopicParametersOuter(block, gpuStream); };
+         default:
+            return [this, ghost_layers, gpuStream](IBlock* block) { calculateMacroscopicParameters(block, ghost_layers, gpuStream); };
+      }
+   }
+
+   std::function<void (IBlock *)> calculateMacroscopicParameters(Type type, gpuStream_t gpuStream)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this, gpuStream](IBlock* block) { calculateMacroscopicParametersInner(block, gpuStream); };
+         case Type::OUTER:
+            return [this, gpuStream](IBlock* block) { calculateMacroscopicParametersOuter(block, gpuStream); };
+         default:
+            return [this, gpuStream](IBlock* block) { calculateMacroscopicParameters(block, cell_idx_c(0), gpuStream); };
+      }
+   }
+
+   void calculateMacroscopicParameters(IBlock * block)
+   {
+      const cell_idx_t ghost_layers = 0;
+      gpuStream_t gpuStream = nullptr;
+
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      auto vel = block->getData< gpu::GPUField<double> >(velID);
+
+      
+      
+      calculateMacroscopicParameters(pdfs, vel, ghost_layers, gpuStream);
+      
+   }
+
+   void calculateMacroscopicParameters(IBlock * block, const cell_idx_t ghost_layers)
+   {
+      gpuStream_t gpuStream = nullptr;
+
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      auto vel = block->getData< gpu::GPUField<double> >(velID);
+
+      
+      
+      calculateMacroscopicParameters(pdfs, vel, ghost_layers, gpuStream);
+      
+   }
+
+   void calculateMacroscopicParameters(IBlock * block, const cell_idx_t ghost_layers, gpuStream_t gpuStream)
+   {
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      auto vel = block->getData< gpu::GPUField<double> >(velID);
+
+      
+      
+      calculateMacroscopicParameters(pdfs, vel, ghost_layers, gpuStream);
+      
+   }
+
+   void calculateMacroscopicParametersCellInterval(IBlock * block, const CellInterval & ci, gpuStream_t gpuStream = nullptr)
+   {
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      auto vel = block->getData< gpu::GPUField<double> >(velID);
+
+      
+      
+      calculateMacroscopicParametersCellInterval(pdfs, vel, ci, gpuStream);
+      
+   }
+
+   void calculateMacroscopicParametersInner(IBlock * block, gpuStream_t gpuStream = nullptr)
+   {
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      auto vel = block->getData< gpu::GPUField<double> >(velID);
+
+      
+      
+
+      CellInterval inner = pdfs->xyzSize();
+      inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2]));
+
+      calculateMacroscopicParametersCellInterval(pdfs, vel, inner, gpuStream);
+   }
+
+   void calculateMacroscopicParametersOuter(IBlock * block, gpuStream_t gpuStream = nullptr)
+   {
+
+      auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+      auto vel = block->getData< gpu::GPUField<double> >(velID);
+
+      
+      
+
+      if( layers_.empty() )
+      {
+         CellInterval ci;
+
+         pdfs->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+      }
+
+    
+      {
+         auto parallelSection_ = parallelStreams_.parallelSection( gpuStream );
+         for( auto & ci: layers_ )
+         {
+          parallelSection_.run([&]( auto s ) {
+             calculateMacroscopicParametersCellInterval(pdfs, vel, ci, gpuStream);
+          });
+         }
+      }
+    
+
+    
+   }
+   
+
+   
+   void setOuterPriority(int priority)
+   {
+      parallelStreams_.setStreamPriority(priority);
+   }
+   
+
+   private:
+      shared_ptr< StructuredBlockStorage > blocks_;
+      BlockDataID pdfsID;
+    BlockDataID velID;
+    std::vector<double> omegaVector;
+
+    private: std::set< gpu::GPUField<double> *, field::SwapableCompare< gpu::GPUField<double> * > > cache_pdfs_;
+
+      Cell outerWidth_;
+      std::vector<CellInterval> layers_;
+
+      gpu::ParallelStreams parallelStreams_;
+      // std::map<BlockID, gpuStream_t > streams_;
+};
+
+
+} // namespace lbm
+} // namespace walberla
+
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/apps/benchmarks/GridRefSphere/UBB.cu b/apps/benchmarks/GridRefSphere/UBB.cu
new file mode 100644
index 000000000..3debf69e8
--- /dev/null
+++ b/apps/benchmarks/GridRefSphere/UBB.cu
@@ -0,0 +1,146 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file UBB.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+#include "UBB.h"
+#include "gpu/ErrorChecking.h"
+
+
+#define FUNC_PREFIX __global__
+
+using namespace std;
+
+namespace walberla {
+namespace lbm {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+//NOLINTBEGIN(readability-non-const-parameter*)
+namespace internal_ubb_even {
+static FUNC_PREFIX __launch_bounds__(256) void ubb_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, double in_v_x, double in_v_y, double in_v_z, int32_t indexVectorSize)
+{
+   
+   const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; 
+   const int32_t f_in_inv_offsets_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1 }; 
+   const int32_t f_in_inv_offsets_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0 }; 
+   const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1 }; 
+   
+   
+   const double weights [] = {0.33333333333333333, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778};
+   
+   
+   
+   const int32_t neighbour_offset_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1 }; 
+   const int32_t neighbour_offset_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0 }; 
+   const int32_t neighbour_offset_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1 }; 
+   
+   if (blockDim.x*blockIdx.x + threadIdx.x < indexVectorSize)
+   {
+      uint8_t * RESTRICT _data_indexVector_10 = _data_indexVector;
+      const int64_t x = *((int32_t * )(& _data_indexVector_10[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
+      uint8_t * RESTRICT _data_indexVector_14 = _data_indexVector + 4;
+      const int64_t y = *((int32_t * )(& _data_indexVector_14[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
+      uint8_t * RESTRICT _data_indexVector_18 = _data_indexVector + 8;
+      const int64_t z = *((int32_t * )(& _data_indexVector_18[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
+      uint8_t * RESTRICT _data_indexVector_112 = _data_indexVector + 12;
+      const int32_t dir = *((int32_t * )(& _data_indexVector_112[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
+      double * RESTRICT  _data_pdfs700fc22ba4e33a75 = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir];
+      double * RESTRICT  _data_pdfs_10_204e9ebfcded49d05e = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir;
+      _data_pdfs700fc22ba4e33a75[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir]] = (in_v_x*6.0*((double)(neighbour_offset_x[dir])) + in_v_y*6.0*((double)(neighbour_offset_y[dir])) + in_v_z*6.0*((double)(neighbour_offset_z[dir])))*-1.0*weights[dir] + _data_pdfs_10_204e9ebfcded49d05e[_stride_pdfs_0*x];
+   } 
+}
+}
+
+//NOLINTEND(readability-non-const-parameter*)
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+
+void UBB::run_impl(IBlock * block, IndexVectors::Type type, gpuStream_t stream)
+{
+   auto * indexVectors = block->getData<IndexVectors>(indexVectorID);
+   int32_t indexVectorSize = int32_c( indexVectors->indexVector(type).size() );
+   if( indexVectorSize == 0)
+      return;
+
+   auto pointer = indexVectors->pointerGpu(type);
+   
+
+   uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer);
+
+   auto pdfs = block->getData< gpu::GPUField<double> >(pdfsID);
+
+   uint8_t timestep = pdfs->getTimestep();
+   auto & in_v_x = in_v_x_;
+    auto & in_v_z = in_v_z_;
+    auto & in_v_y = in_v_y_;
+   WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))
+    double * RESTRICT  _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    if(((timestep & 1) ^ 1)) {
+        dim3 _block(uint32_c(((256 < indexVectorSize) ? 256 : indexVectorSize)), uint32_c(1), uint32_c(1));
+        dim3 _grid(uint32_c(( (indexVectorSize) % (((256 < indexVectorSize) ? 256 : indexVectorSize)) == 0 ? (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) : ( (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) ) +1 )), uint32_c(1), uint32_c(1));
+        internal_ubb_even::ubb_even<<<_grid, _block, 0, stream>>>(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, in_v_x, in_v_y, in_v_z, indexVectorSize);
+    } else {
+        dim3 _block(uint32_c(((256 < indexVectorSize) ? 256 : indexVectorSize)), uint32_c(1), uint32_c(1));
+        dim3 _grid(uint32_c(( (indexVectorSize) % (((256 < indexVectorSize) ? 256 : indexVectorSize)) == 0 ? (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) : ( (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) ) +1 )), uint32_c(1), uint32_c(1));
+        internal_ubb_even::ubb_even<<<_grid, _block, 0, stream>>>(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, in_v_x, in_v_y, in_v_z, indexVectorSize);
+    }
+}
+
+void UBB::run(IBlock * block, gpuStream_t stream)
+{
+   run_impl(block, IndexVectors::ALL, stream);
+}
+
+void UBB::inner(IBlock * block, gpuStream_t stream)
+{
+   run_impl(block, IndexVectors::INNER, stream);
+}
+
+void UBB::outer(IBlock * block, gpuStream_t stream)
+{
+   run_impl(block, IndexVectors::OUTER, stream);
+}
+
+} // namespace lbm
+} // namespace walberla
+
diff --git a/apps/benchmarks/GridRefSphere/UBB.h b/apps/benchmarks/GridRefSphere/UBB.h
new file mode 100644
index 000000000..b4e0a5f43
--- /dev/null
+++ b/apps/benchmarks/GridRefSphere/UBB.h
@@ -0,0 +1,530 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file UBB.h
+//! \\author pystencils
+//======================================================================================================================
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "gpu/FieldCopy.h"
+#include "gpu/GPUField.h"
+#include "gpu/GPUWrapper.h"
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "blockforest/StructuredBlockForest.h"
+#include "field/FlagField.h"
+#include "core/debug/Debug.h"
+
+#include <set>
+#include <vector>
+
+
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace lbm {
+
+
+class UBB
+{
+public:
+    struct IndexInfo { 
+        int32_t x;
+        int32_t y;
+        int32_t z;
+        int32_t dir;
+        IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_) : x(x_), y(y_), z(z_), dir(dir_) {}
+        bool operator==(const IndexInfo & o) const {
+            return x == o.x && y == o.y && z == o.z && dir == o.dir;
+        }
+    };
+
+
+
+    class IndexVectors
+    {
+    public:
+        using CpuIndexVector = std::vector<IndexInfo>;
+
+        enum Type {
+            ALL = 0,
+            INNER = 1,
+            OUTER = 2,
+            NUM_TYPES = 3
+        };
+
+        IndexVectors() = default;
+        bool operator==(IndexVectors const &other) const { return other.cpuVectors_ == cpuVectors_; }
+
+        ~IndexVectors() {
+            for( auto & gpuVec: gpuVectors_)
+               WALBERLA_GPU_CHECK(gpuFree( gpuVec ));
+        }
+        CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; }
+        IndexInfo * pointerCpu(Type t)  { return cpuVectors_[t].data(); }
+
+        IndexInfo * pointerGpu(Type t)  { return gpuVectors_[t]; }
+        void syncGPU()
+        {
+            for( auto & gpuVec: gpuVectors_)
+               WALBERLA_GPU_CHECK(gpuFree( gpuVec ));
+            gpuVectors_.resize( cpuVectors_.size() );
+
+            WALBERLA_ASSERT_EQUAL(cpuVectors_.size(), NUM_TYPES);
+            for(size_t i=0; i < cpuVectors_.size(); ++i )
+            {
+                auto & gpuVec = gpuVectors_[i];
+                auto & cpuVec = cpuVectors_[i];
+                WALBERLA_GPU_CHECK(gpuMalloc( &gpuVec, sizeof(IndexInfo) * cpuVec.size() ));
+                WALBERLA_GPU_CHECK(gpuMemcpy( gpuVec, &cpuVec[0], sizeof(IndexInfo) * cpuVec.size(), gpuMemcpyHostToDevice ));
+            }
+        }
+
+    private:
+        std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+
+        using GpuIndexVector = IndexInfo *;
+        std::vector<GpuIndexVector> gpuVectors_;
+    };
+
+    UBB( const shared_ptr<StructuredBlockForest> & blocks,
+                   BlockDataID pdfsID_, double in_v_x, double in_v_y, double in_v_z)
+        : pdfsID(pdfsID_), in_v_x_(in_v_x), in_v_y_(in_v_y), in_v_z_(in_v_z)
+    {
+        auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); };
+        indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_UBB");
+    };
+
+    void run (IBlock * block, gpuStream_t stream = nullptr);
+
+    void operator() (IBlock * block, gpuStream_t stream = nullptr)
+    {
+        run(block, stream);
+    }
+
+    void inner (IBlock * block, gpuStream_t stream = nullptr);
+
+    void outer (IBlock * block, gpuStream_t stream = nullptr);
+
+    std::function<void (IBlock *)> getSweep(gpuStream_t stream = nullptr)
+    {
+        return [this, stream]
+               (IBlock * b)
+               { this->run(b, stream); };
+    }
+
+    std::function<void (IBlock *)> getInnerSweep(gpuStream_t stream = nullptr)
+    {
+        return [this, stream]
+               (IBlock * b)
+               { this->inner(b, stream); };
+    }
+
+    std::function<void (IBlock *)> getOuterSweep(gpuStream_t stream = nullptr)
+    {
+        return [this, stream]
+               (IBlock * b)
+               { this->outer(b, stream); };
+    }
+
+    template<typename FlagField_T>
+    void fillFromFlagField( const shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID)
+    {
+        for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+            fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID );
+    }
+
+
+    template<typename FlagField_T>
+    void fillFromFlagField(IBlock * block, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID )
+    {
+        auto * indexVectors = block->getData< IndexVectors > ( indexVectorID );
+        auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+        auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+        auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+        auto * flagField = block->getData< FlagField_T > ( flagFieldID );
+        
+
+        if( !(flagField->flagExists(boundaryFlagUID) && flagField->flagExists(domainFlagUID) ))
+            return;
+
+        auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+        auto domainFlag = flagField->getFlag(domainFlagUID);
+
+        auto inner = flagField->xyzSize();
+        inner.expand( cell_idx_t(-1) );
+
+        indexVectorAll.clear();
+        indexVectorInner.clear();
+        indexVectorOuter.clear();
+
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  0 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  1 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  2 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  3 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  4 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  5 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  6 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  7 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  8 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  9 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  10 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  11 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  12 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  13 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  14 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  15 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  16 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  17 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  18 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        
+        
+
+        indexVectors->syncGPU();
+    }
+
+private:
+    void run_impl(IBlock * block, IndexVectors::Type type, gpuStream_t stream = nullptr);
+
+    BlockDataID indexVectorID;
+    
+public:
+    BlockDataID pdfsID;
+    double in_v_x_;
+    double in_v_y_;
+    double in_v_z_;
+};
+
+
+
+} // namespace lbm
+} // namespace walberla
\ No newline at end of file
diff --git a/apps/benchmarks/GridRefSphere/gridRef_SPHERE.py b/apps/benchmarks/GridRefSphere/gridRef_SPHERE.py
new file mode 100644
index 000000000..7cd3caa00
--- /dev/null
+++ b/apps/benchmarks/GridRefSphere/gridRef_SPHERE.py
@@ -0,0 +1,64 @@
+import sympy as sp
+import pystencils as ps
+from lbmpy import LBMConfig, LBMOptimisation, LBStencil, Method, Stencil
+from lbmpy.creationfunctions import create_lb_method,create_lb_collision_rule
+from pystencils_walberla import CodeGeneration, generate_sweep, generate_pack_info_from_kernel
+from lbmpy_walberla import generate_lbm_package, generate_lbm_storage_specification,RefinementScaling,generate_lbm_sweep_collection,lbm_boundary_generator,generate_boundary_collection
+from lbmpy.boundaries import NoSlip,ExtrapolationOutflow,FreeSlip,UBB
+#
+#
+#
+#
+#
+#--------- Symbolic parameters ---------#
+#
+#
+#
+inlet_tuple = sp.symbols("in_v_x in_v_y in_v_z")  #Symbols for the u,v,w components of the Freestream velocity
+omega       = sp.Symbol("omega")                  #Symbol for the SRT relaxation frequency
+stencil     = LBStencil("D3Q19")                  #Chosen stencil to sample the velocity space
+layout      = "fzyx"                              #Layout for storing the fields in memory
+data_type   = "float64"                           #Datatype of the data in the fields (here double precision is assumed)
+target      = ps.Target.GPU                       #Defines whether the generated code must be for GPU or CPU
+streaming_pattern = 'pull'
+#
+#
+#
+#---------------------------------------#
+#
+#
+#
+with CodeGeneration() as ctx:
+    vel = ps.fields(f"vel(3) : {data_type}[{stencil.D}D]",layout=layout) 
+    output = {"velocity":vel}
+    pdfs     = ps.fields(f"pdfs({stencil.Q}) : {data_type}[{stencil.D}D]",layout=layout)
+    pdfs_tmp = ps.fields(f"pdfs_tmp({stencil.Q}) : {data_type}[{stencil.D}D]",layout=layout)
+    #
+    lbm_config = LBMConfig(stencil=stencil,method=Method.SRT,relaxation_rate=omega,zero_centered=False,equilibrium_order=2,continuous_equilibrium=False,output=output)
+    lbm_opt = LBMOptimisation(cse_pdfs=False,cse_global=False,simplification=False,field_layout=layout,symbolic_field=pdfs,symbolic_temporary_field=pdfs_tmp)
+    method = create_lb_method(lbm_config)
+    collision_rule = create_lb_collision_rule(lbm_config=lbm_config,lbm_optimisation=lbm_opt)
+    #
+    #
+    noslip   = lbm_boundary_generator("NoSlip",flag_uid="NoSlip",boundary_object=NoSlip())
+    outflow  = lbm_boundary_generator("Outflow",flag_uid="Outflow",boundary_object=ExtrapolationOutflow((1,0,0),method))
+    freeslip = lbm_boundary_generator("FreeSlip",flag_uid="FreeSlip",boundary_object=FreeSlip(stencil))
+    ubb      = lbm_boundary_generator("UBB",flag_uid="UBB",boundary_object=UBB(inlet_tuple,data_type=data_type))
+    generate_lbm_package(ctx, name="",
+                         collision_rule=collision_rule,
+                         lbm_config=lbm_config, lbm_optimisation=lbm_opt,
+                         nonuniform=True, boundaries=[noslip, outflow, freeslip, ubb],
+                         macroscopic_fields=output,
+                         target=ps.Target.GPU)
+
+    #REFINEMENT_SCALING = RefinementScaling()
+    #REFINEMENT_SCALING.add_standard_relaxation_rate_scaling(omega)
+    #generate_lbm_storage_specification(ctx,"StorageSpecification",method,lbm_config,nonuniform=True,target=target,data_type=data_type)
+    #generate_lbm_sweep_collection(ctx,"Sweeps",collision_rule=collision_rule,streaming_pattern=streaming_pattern,field_layout = layout,refinement_scaling = REFINEMENT_SCALING,macroscopic_fields = output,target = target,data_type = data_type)
+    #
+#    noslip   = lbm_boundary_generator("NoSlip",flag_uid="NoSlip",boundary_object=NoSlip())
+#    outflow  = lbm_boundary_generator("Outflow",flag_uid="Outflow",boundary_object=ExtrapolationOutflow((1,0,0),method))
+#    freeslip = lbm_boundary_generator("FreeSlip",flag_uid="FreeSlip",boundary_object=FreeSlip(stencil))
+#    ubb      = lbm_boundary_generator("UBB",flag_uid="UBB",boundary_object=UBB(inlet_tuple,data_type=data_type))
+    #
+#    generate_boundary_collection(ctx,"BoundaryCollection",boundary_generators = [noslip,outflow,freeslip,ubb],lb_method = method, field_name='pdfs',streaming_pattern=streaming_pattern,target=target, layout=layout)
diff --git a/apps/benchmarks/GridRefSphere/refGrid_SPHERE.cpp b/apps/benchmarks/GridRefSphere/refGrid_SPHERE.cpp
new file mode 100644
index 000000000..a5f2bddfb
--- /dev/null
+++ b/apps/benchmarks/GridRefSphere/refGrid_SPHERE.cpp
@@ -0,0 +1,235 @@
+#include "blockforest/all.h"
+#include "core/all.h"
+#include "domain_decomposition/all.h"
+#include "field/all.h"
+#include "geometry/all.h"
+#include "timeloop/all.h"
+#include <iostream>
+#include <cmath>
+
+#include "mesh_common/DistanceComputations.h"
+#include "mesh_common/DistanceFunction.h"
+#include "mesh_common/MatrixVectorOperations.h"
+#include "mesh_common/MeshIO.h"
+#include "mesh_common/MeshOperations.h"
+#include "mesh_common/TriangleMeshes.h"
+#include "mesh_common/distance_octree/DistanceOctree.h"
+#include "mesh_common/vtk/CommonDataSources.h"
+#include "mesh_common/vtk/VTKMeshWriter.h"
+#include "mesh/blockforest/BlockExclusion.h"
+#include "mesh/blockforest/BlockForestInitialization.h"
+#include "mesh/blockforest/BlockWorkloadMemory.h"
+#include "mesh/blockforest/RefinementSelection.h"
+#include "mesh/boundary/BoundaryInfo.h"
+#include "mesh/boundary/BoundaryLocation.h"
+#include "mesh/boundary/BoundaryLocationFunction.h"
+#include "mesh/boundary/BoundarySetup.h"
+#include "mesh/boundary/BoundaryUIDFaceDataSource.h"
+#include "mesh/boundary/ColorToBoundaryMapper.h"
+
+#include "gpu/AddGPUFieldToStorage.h"
+#include "gpu/DeviceSelectMPI.h"
+#include "gpu/HostFieldAllocator.h"
+#include "gpu/ParallelStreams.h"
+#include "gpu/communication/GPUPackInfo.h"
+#include "gpu/communication/NonUniformGPUScheme.h"
+#include "gpu/FieldCopy.h"
+
+#include "field/AddToStorage.h"
+#include "field/FlagField.h"
+#include "field/vtk/VTKWriter.h"
+#include "lbm_generated/evaluation/PerformanceEvaluation.h"
+#include "lbm_generated/field/PdfField.h"
+#include "lbm_generated/field/AddToStorage.h"
+#include "lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h"
+#include "lbm_generated/gpu/GPUPdfField.h"
+#include "lbm_generated/gpu/AddToStorage.h"
+#include "lbm_generated/gpu/BasicRecursiveTimeStepGPU.h"
+
+#include "BoundaryCollection.h"  
+#include "FreeSlip.h"
+#include "NoSlip.h"
+#include "Outflow.h"
+#include "UBB.h"
+#include "StorageSpecification.h"
+#include "SweepCollection.h"
+
+namespace walberla{
+  //	
+  using StorageSpecification_T    = lbm::StorageSpecification; 
+  using Stencil_T                 = StorageSpecification_T::Stencil;
+  using PdfField_T                = lbm_generated::PdfField< StorageSpecification_T >;
+  using GPUPdfField_T             = lbm_generated::GPUPdfField< StorageSpecification_T >;
+  using PackInfo_T                = lbm_generated::NonuniformGeneratedGPUPdfPackInfo<GPUPdfField_T>;
+  using VectorField_T             = field::GhostLayerField<real_t,Stencil_T::D>;   
+  using flag_t                    =  walberla::uint8_t;
+  using real_t                    = double;
+  using FlagField_T               = FlagField<flag_t>;
+  using NoSlip_T                  = lbm::NoSlip;
+  using FreeSlip_T                = lbm::FreeSlip;
+  using Outflow_T                 = lbm::Outflow;				
+  using UBB_T                     = lbm::UBB;					
+  const FlagUID Fluid_UID("Fluid");
+  const FlagUID NoSlip_UID("NoSlip");
+  const FlagUID FreeSlip_UID("FreeSlip");
+  const FlagUID Outflow_UID("Outflow");
+  const FlagUID UBB_UID("UBB");
+  uint_t numGhostLayers = uint_t(2); 
+
+  int main(int argc, char** argv){
+      walberla::Environment walberlaEnv( argc, argv ); 
+      mpi::MPIManager::instance()->useWorldComm(); 
+      gpu::selectDeviceBasedOnMpiRank(); 
+      //
+      auto parameters = walberlaEnv.config()->getOneBlock("Parameters");
+      auto domain     = walberlaEnv.config()->getOneBlock("DomainSetup");
+      auto boundariesConfig = walberlaEnv.config()->getOneBlock("Boundaries");
+      //
+      const real_t Re = parameters.getParameter<real_t>("Re",real_c(1000)); //Reynolds number
+      const real_t maxPhysT = parameters.getParameter<real_t>("maxPhysT",real_c(1.0)); //Total simulation time, seconds 
+      const real_t uLB = parameters.getParameter< real_t >("uLB",real_c(0.01)); //Freestream velocity in lattice units
+      const real_t remainingTimeLoggerFrequency = parameters.getParameter< real_t >("remainingTimeLoggerFrequency",real_c(3));
+      // 
+      const real_t dx0 = domain.getParameter<real_t>("dx0",real_t(0.0));
+      const real_t dy0 = domain.getParameter<real_t>("dy0",real_t(0.0));
+      const real_t dz0 = domain.getParameter<real_t>("dz0",real_t(0.0));
+      const real_t dt0 = domain.getParameter<real_t>("dt0",real_t(0.0));
+      const uint_t numLevels = domain.getParameter<uint_t>("numLevels",uint_t(1));
+      const Vector3<uint_t> cellsPerBlock = domain.getParameter<Vector3<uint_t>>("cellsPerBlock",Vector3<uint_t>());
+      const Vector3<uint_t> numblocks     = domain.getParameter<Vector3<uint_t>>("numblocks",Vector3<uint_t>());
+      const Vector3<real_t> domainScaling = domain.getParameter<Vector3<real_t>>("domainScaling", Vector3< real_t >(1.0));
+      const std::string meshFile = domain.getParameter<std::string>("meshFile");
+      const real_t diameter = domain.getParameter<real_t>("diameter"); 
+      const Vector3< bool > periodicity = domain.getParameter< Vector3< bool > >("periodic", Vector3< bool >(false));
+      //
+      const real_t viscosity0 = (diameter/dx0) * uLB / Re; 
+      const real_t omega0 = 1.0/(3.0*viscosity0 + 1.0/2.0);
+      const Vector3<real_t> inletVelocity(uLB, real_c(0), real_c(0));
+      const uint_t timesteps = uint_c(maxPhysT/dt0);
+      const int VTKwriteFrequency = timesteps - 2; 
+      /*
+      WALBERLA_LOG_INFO_ON_ROOT("Re =  " << Re);
+      WALBERLA_LOG_INFO_ON_ROOT("uLB =  " << uLB);
+      WALBERLA_LOG_INFO_ON_ROOT("timesteps =  " << timesteps);
+      WALBERLA_LOG_INFO_ON_ROOT("viscosity0 =  " << viscosity0);
+      WALBERLA_LOG_INFO_ON_ROOT("omega0 =  " << omega0 );
+      WALBERLA_LOG_INFO_ON_ROOT("dx0 = " << dx0);
+      WALBERLA_LOG_INFO_ON_ROOT("cell per block<x> = " << cellsPerBlock[0])
+      WALBERLA_LOG_INFO_ON_ROOT("cell per block<y> = " << cellsPerBlock[1])
+      WALBERLA_LOG_INFO_ON_ROOT("cell per block<z> = " << cellsPerBlock[2])    
+      */  
+      //
+      auto mesh = make_shared< mesh::TriangleMesh >();  
+      mesh->request_vertex_colors();                    
+      mesh::readAndBroadcast(meshFile, *mesh);          
+      //
+      auto triDist = make_shared< mesh::TriangleDistance< mesh::TriangleMesh > >(mesh);
+      auto distanceOctree = make_shared< mesh::DistanceOctree< mesh::TriangleMesh > >(triDist);
+      WALBERLA_ROOT_SECTION(){distanceOctree->writeVTKOutput("distanceOctree");}
+      //
+      Vector3<real_t> delta;
+      delta[0] = 3.0*diameter;
+      delta[1] = domainScaling[1]*diameter/2.0;
+      delta[2] = domainScaling[2]*diameter/2.0;
+      Vector3<real_t> Lbox;
+      Lbox[0] = domainScaling[0]*diameter;
+      Lbox[1] = domainScaling[1]*diameter;
+      Lbox[2] = domainScaling[2]*diameter;
+      auto aabb_mesh = computeAABB(*mesh);
+      auto center = aabb_mesh.center();
+      auto aabb = math::GenericAABB(center[0] - Lbox[0]/2.0, center[1] - Lbox[1]/2.0, center[2] - Lbox[2]/2.0,
+		      center[0] + Lbox[0]/2.0, center[1] + Lbox[1]/2.0, center[2] + Lbox[2]/2.0);
+      Vector3<real_t> translation;
+      translation[0] = center[0] - delta[0] - aabb.xMin();
+      translation[1] = center[1] - delta[1] - aabb.yMin();
+      translation[2] = center[2] - delta[2] - aabb.zMin();
+      aabb.translate(translation); 
+      Vector3<real_t> sampling;
+      sampling[0] = dx0;
+      sampling[1] = dy0;
+      sampling[2] = dz0;
+      mesh::ComplexGeometryStructuredBlockforestCreator bfc(aabb,sampling);
+      bfc.setPeriodicity(periodicity);
+      auto RBE = mesh::makeExcludeMeshInterior(distanceOctree,dx0);
+      auto RS  = mesh::makeRefinementSelection(distanceOctree,numLevels-1,dx0,dx0);
+      auto BE  = mesh::makeExcludeMeshInteriorRefinement(distanceOctree,dx0/(double)(std::pow(2.0,numLevels)));
+      bfc.setRootBlockExclusionFunction(RBE);
+      bfc.setRefinementSelectionFunction(RS);
+      bfc.setBlockExclusionFunction(BE);
+      auto setupForest = bfc.createSetupBlockForest(cellsPerBlock,numblocks);
+      WALBERLA_ROOT_SECTION(){setupForest->writeVTKOutput("SetUpForest");}
+      auto blocks = bfc.createStructuredBlockForest(cellsPerBlock,numblocks);
+      //
+      const StorageSpecification_T StorageSpec = StorageSpecification_T();
+      auto allocator = make_shared< gpu::HostFieldAllocator<real_t> >();
+      BlockDataID pdfFieldId_CPU  = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, numGhostLayers, field::fzyx, allocator);
+      BlockDataID velocityFieldId_CPU = field::addToStorage< VectorField_T >(blocks, "velocity", real_c(0.0), field::fzyx,numGhostLayers);      
+      BlockDataID flagFieldId     = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field",numGhostLayers); 
+      //
+      BlockDataID pdfFieldId_GPU  = lbm_generated::addGPUPdfFieldToStorage<PdfField_T>(blocks,pdfFieldId_CPU,StorageSpec,"pdfGPU"); 
+      BlockDataID velocityFieldId_GPU=gpu::addGPUFieldToStorage< VectorField_T >(blocks, velocityFieldId_CPU, "velocity on GPU", true);
+      //
+      lbm::SweepCollection sweeps(blocks,pdfFieldId_GPU,velocityFieldId_GPU,omega0); 
+      for (auto& iBlock : *blocks){sweeps.initialise(&iBlock,numGhostLayers);}      
+      WALBERLA_MPI_BARRIER()
+      //
+      flag_t flag_no_slip_, flag_fluid_;
+      for (auto iblock = blocks->begin(); iblock != blocks->end(); ++iblock){
+          auto tmp = iblock->getData<FlagField_T>(flagFieldId);
+	  flag_no_slip_ = tmp->getOrRegisterFlag(NoSlip_UID);
+	  flag_fluid_ = tmp->getOrRegisterFlag(Fluid_UID);
+      }
+      //
+      // -- Boundary conditions -- //
+      mesh::BoundarySetup boundarySetup(blocks, makeMeshDistanceFunction(distanceOctree), numGhostLayers);
+      boundarySetup.setFlag<FlagField_T>(flagFieldId,NoSlip_UID,mesh::BoundarySetup::INSIDE);
+      geometry::initBoundaryHandling< FlagField_T >(*blocks, flagFieldId, boundariesConfig); 
+      geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldId, Fluid_UID);
+      lbm::BoundaryCollection<FlagField_T> BCs(blocks,flagFieldId,pdfFieldId_CPU,pdfFieldId_GPU,Fluid_UID,inletVelocity[0],inletVelocity[1],inletVelocity[2]);
+      //
+      shared_ptr<gpu::communication::NonUniformGPUScheme<Stencil_T>> com_ptr = make_shared<gpu::communication::NonUniformGPUScheme<Stencil_T>>(blocks,false);
+      shared_ptr<PackInfo_T> packInfo = lbm_generated::setupNonuniformGPUPdfCommunication<GPUPdfField_T>(blocks,pdfFieldId_GPU); 
+      com_ptr->addPackInfo(packInfo);
+      WALBERLA_MPI_BARRIER()
+      //
+      int streamHighPriority = 0;
+      int streamLowPriority  = 0;
+      WALBERLA_GPU_CHECK(gpuDeviceGetStreamPriorityRange(&streamLowPriority, &streamHighPriority))
+      sweeps.setOuterPriority(streamHighPriority);
+      auto defaultStream = gpu::StreamRAII::newPriorityStream(streamLowPriority);
+      //
+      lbm_generated::BasicRecursiveTimeStepGPU<GPUPdfField_T,lbm::SweepCollection,lbm::BoundaryCollection<FlagField_T>> MR_lbm(blocks,pdfFieldId_GPU,sweeps,BCs,com_ptr,packInfo);     
+      SweepTimeloop timeloop(blocks->getBlockStorage(),timesteps);
+      MR_lbm.addRefinementToTimeLoop(timeloop);
+      timeloop.addFuncAfterTimeStep(timing::RemainingTimeLogger(timeloop.getNrOfTimeSteps(), remainingTimeLoggerFrequency),"remaining time logger");
+      //Lambda function for setting to zero the velocities inside the sphere
+      auto set2zero_Vel = [&](){
+              for(auto blk_ptr = blocks->begin(); blk_ptr != blocks->end(); ++blk_ptr){
+                 auto VEL = blk_ptr->getData<VectorField_T>(velocityFieldId_CPU);
+                 auto flagField = blk_ptr->getData<FlagField_T>(flagFieldId);
+                 for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it ){
+                   if( isFlagSet(it, flag_no_slip_) && !isFlagSet(it,flag_fluid_)){
+                     VEL->get(it.x(),it.y(),it.z(),0) = 0.0;
+                     VEL->get(it.x(),it.y(),it.z(),1) = 0.0;
+                     VEL->get(it.x(),it.y(),it.z(),2) = 0.0;
+                   }//end if
+                 }//end for it
+              }//end for blk
+      };//end set2zero_Vel
+      //
+      if (VTKwriteFrequency > 0)
+      {
+         const std::string path = "Result_Sphere"; 
+         auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "velocity_field", VTKwriteFrequency, 0,false, path, "simulation_step", false, true, true, false, 0);
+	 vtkOutput->addBeforeFunction([&](){gpu::fieldCpy<VectorField_T,gpu::GPUField<real_t>>(blocks,velocityFieldId_CPU,velocityFieldId_GPU);});
+         vtkOutput->addBeforeFunction(set2zero_Vel);
+	 auto velWriter = make_shared< field::VTKWriter< VectorField_T > >(velocityFieldId_CPU, "Velocity");
+	 vtkOutput->addCellDataWriter(velWriter);
+         timeloop.addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
+      }//end if VTKwriteFrequency
+      timeloop.run();
+      return EXIT_SUCCESS;
+  }//main
+} //namespace walberla
+
+int main( int argc, char ** argv ){walberla::main(argc, argv);}
diff --git a/apps/benchmarks/GridRefSphere/sphere.obj b/apps/benchmarks/GridRefSphere/sphere.obj
new file mode 100644
index 000000000..034d95add
--- /dev/null
+++ b/apps/benchmarks/GridRefSphere/sphere.obj
@@ -0,0 +1,218 @@
+####
+#
+# OBJ File Generated by Meshlab
+#
+####
+# Object sphere.obj
+#
+# Vertices: 52
+# Faces: 100
+#
+####
+vn 3.203486 0.106207 4.556782
+v 0.589717 0.013032 0.847959 0.752941 0.752941 0.752941
+vn 5.019849 1.775524 1.684732
+v 0.919162 0.323653 0.317881 0.752941 0.752941 0.752941
+vn 2.435736 2.603171 4.338618
+v 0.454446 0.492016 0.779993 0.752941 0.752941 0.752941
+vn 1.166519 4.432512 3.718873
+v 0.245629 0.761303 0.617920 0.752941 0.752941 0.752941
+vn 3.325176 4.334585 2.054782
+v 0.568591 0.771892 0.343297 0.752941 0.752941 0.752941
+vn 0.595211 5.541665 1.103712
+v 0.110726 0.991360 0.237197 0.752941 0.752941 0.752941
+vn -3.776296 0.767626 4.266579
+v -0.688569 0.139508 0.738187 0.000000 1.000000 0.000000
+vn -1.312800 2.808851 4.706018
+v -0.270132 0.525142 0.837286 0.000000 1.000000 0.000000
+vn -3.673248 3.342919 3.246579
+v -0.616965 0.568913 0.559955 0.000000 1.000000 0.000000
+vn -5.184217 1.765921 1.597865
+v -0.913888 0.321159 0.322155 0.000000 1.000000 0.000000
+vn -1.576481 4.921260 2.246386
+v -0.288050 0.891505 0.427122 0.000000 1.000000 0.000000
+vn -3.631493 4.235586 0.210513
+v -0.677783 0.771802 0.054808 0.000000 1.000000 0.000000
+vn 5.347003 -0.730677 2.243891
+v 0.939495 -0.137963 0.353373 0.752941 0.752941 0.752941
+vn 3.090607 -2.164615 4.376353
+v 0.564054 -0.405378 0.749705 0.752941 0.752941 0.752941
+vn 4.139648 -3.083573 2.833979
+v 0.686938 -0.535204 0.519371 0.752941 0.752941 0.752941
+vn 5.547247 -1.363092 0.328926
+v 0.988626 -0.269261 0.045694 0.752941 0.752941 0.752941
+vn 1.196453 -3.595379 4.241168
+v 0.225855 -0.666498 0.743600 0.752941 0.752941 0.752941
+vn 3.440788 -4.400445 1.622125
+v 0.605470 -0.764079 0.286901 0.752941 0.752941 0.752941
+vn 1.483118 -5.448192 -0.612228
+v 0.268689 -0.989603 -0.074576 0.752941 0.752941 0.752941
+vn -5.131380 -1.139949 2.118813
+v -0.938366 -0.205491 0.365196 0.000000 1.000000 0.000000
+vn -0.148252 -0.090956 5.329934
+v -0.029807 -0.012393 1.044330 0.752941 0.752941 0.752941
+vn -2.941041 -1.873027 4.366208
+v -0.544021 -0.347070 0.802954 0.752941 0.752941 0.752941
+vn -4.510469 -3.611316 0.273923
+v -0.799559 -0.630144 0.056333 0.752941 0.752941 0.752941
+vn -1.222675 -3.813472 4.059360
+v -0.223449 -0.686099 0.721907 0.752941 0.752941 0.752941
+vn -3.177381 -4.314878 2.055289
+v -0.568591 -0.771892 0.343297 0.752941 0.752941 0.752941
+vn 0.555067 -5.357186 2.097567
+v 0.103775 -0.959491 0.337408 0.752941 0.752941 0.752941
+vn -1.424179 -5.441336 0.261796
+v -0.242174 -0.999619 0.061062 0.752941 0.752941 0.752941
+vn 0.603496 0.876270 -5.566095
+v 0.094800 0.183733 -1.001620 0.752941 0.752941 0.752941
+vn 3.764494 0.845602 -4.219594
+v 0.688569 0.139508 -0.738187 0.752941 0.752941 0.752941
+vn 1.567199 2.909075 -4.657190
+v 0.270132 0.525142 -0.837286 0.752941 0.752941 0.752941
+vn 3.565448 3.772605 -2.549637
+v 0.615553 0.702721 -0.424646 0.752941 0.752941 0.752941
+vn 5.162828 1.763767 -1.614322
+v 0.913888 0.321159 -0.322155 0.752941 0.752941 0.752941
+vn 1.324314 5.031288 -2.331577
+v 0.253180 0.897558 -0.437313 0.752941 0.752941 0.752941
+vn 3.629731 4.384550 -0.093890
+v 0.650335 0.795689 -0.024263 0.752941 0.752941 0.752941
+vn -4.043519 0.309797 -4.155685
+v -0.705153 0.056764 -0.729210 0.000000 1.000000 0.000000
+vn -2.307409 1.462065 -5.138504
+v -0.415375 0.253185 -0.890210 0.752941 0.752941 0.752941
+vn -3.353315 3.567168 -2.854749
+v -0.600441 0.642788 -0.533812 0.000000 1.000000 0.000000
+vn -5.130730 1.780599 -1.582083
+v -0.919162 0.323653 -0.317881 0.000000 1.000000 0.000000
+vn -1.144898 3.843270 -4.171122
+v -0.223449 0.686099 -0.721907 0.752941 0.752941 0.752941
+vn -0.991535 5.441091 -1.454840
+v -0.143287 0.963273 -0.301638 0.000000 1.000000 0.000000
+vn 5.131380 -1.139949 -2.118813
+v 0.938366 -0.205491 -0.365196 0.752941 0.752941 0.752941
+vn 2.875490 -1.819046 -4.391660
+v 0.544021 -0.347070 -0.802954 0.752941 0.752941 0.752941
+vn 4.525778 -3.601130 -0.274730
+v 0.799559 -0.630144 -0.056333 0.752941 0.752941 0.752941
+vn 1.250127 -3.837570 -4.049940
+v 0.223449 -0.686099 -0.721907 0.752941 0.752941 0.752941
+vn 3.198936 -4.303796 -2.086038
+v 0.568591 -0.771892 -0.343297 0.752941 0.752941 0.752941
+vn -0.857578 -1.469198 -5.294847
+v -0.187308 -0.267522 -0.979869 0.752941 0.752941 0.752941
+vn -3.158328 -2.247927 -4.280213
+v -0.564054 -0.405378 -0.749705 0.752941 0.752941 0.752941
+vn -5.397189 -0.800014 -2.176419
+v -0.939495 -0.137963 -0.353373 0.000000 1.000000 0.000000
+vn -4.099356 -3.088730 -2.858676
+v -0.686938 -0.535204 -0.519371 0.752941 0.752941 0.752941
+vn -5.547246 -1.363092 -0.328926
+v -0.988626 -0.269261 -0.045694 0.000000 1.000000 0.000000
+vn -3.407609 -4.424288 -1.578537
+v -0.605470 -0.764079 -0.286901 0.752941 0.752941 0.752941
+vn -0.843019 -4.558556 -3.151353
+v -0.153857 -0.826487 -0.592781 0.752941 0.752941 0.752941
+# 52 vertices, 0 vertices normals
+f 16//16 2//2 13//13
+f 16//16 32//32 2//2
+f 2//2 3//3 1//1
+f 1//1 3//3 21//21
+f 21//21 3//3 8//8
+f 2//2 5//5 3//3
+f 2//2 34//34 5//5
+f 3//3 5//5 4//4
+f 3//3 4//4 8//8
+f 5//5 6//6 4//4
+f 4//4 6//6 11//11
+f 5//5 34//34 6//6
+f 34//34 33//33 6//6
+f 22//22 21//21 7//7
+f 7//7 10//10 20//20
+f 20//20 10//10 50//50
+f 7//7 21//21 8//8
+f 7//7 8//8 9//9
+f 7//7 9//9 10//10
+f 10//10 9//9 12//12
+f 10//10 12//12 38//38
+f 8//8 11//11 9//9
+f 9//9 11//11 12//12
+f 8//8 4//4 11//11
+f 1//1 13//13 2//2
+f 1//1 14//14 13//13
+f 1//1 21//21 14//14
+f 14//14 15//15 13//13
+f 13//13 15//15 16//16
+f 21//21 17//17 14//14
+f 15//15 18//18 16//16
+f 16//16 18//18 43//43
+f 14//14 17//17 15//15
+f 15//15 17//17 18//18
+f 17//17 24//24 26//26
+f 17//17 26//26 18//18
+f 18//18 19//19 43//43
+f 18//18 26//26 19//19
+f 7//7 20//20 22//22
+f 50//50 23//23 20//20
+f 22//22 24//24 21//21
+f 21//21 24//24 17//17
+f 23//23 25//25 20//20
+f 20//20 25//25 22//22
+f 22//22 25//25 24//24
+f 23//23 27//27 25//25
+f 25//25 27//27 24//24
+f 24//24 27//27 26//26
+f 42//42 28//28 29//29
+f 29//29 32//32 41//41
+f 41//41 32//32 16//16
+f 29//29 28//28 30//30
+f 29//29 30//30 31//31
+f 29//29 31//31 32//32
+f 32//32 31//31 34//34
+f 32//32 34//34 2//2
+f 30//30 33//33 31//31
+f 30//30 39//39 33//33
+f 31//31 33//33 34//34
+f 33//33 40//40 6//6
+f 50//50 38//38 48//48
+f 36//36 28//28 46//46
+f 50//50 10//10 38//38
+f 38//38 37//37 35//35
+f 35//35 37//37 36//36
+f 36//36 37//37 39//39
+f 36//36 39//39 28//28
+f 28//28 39//39 30//30
+f 38//38 12//12 37//37
+f 37//37 12//12 40//40
+f 37//37 40//40 39//39
+f 39//39 40//40 33//33
+f 12//12 11//11 40//40
+f 40//40 11//11 6//6
+f 29//29 41//41 42//42
+f 42//42 46//46 28//28
+f 16//16 43//43 41//41
+f 42//42 44//44 46//46
+f 43//43 45//45 41//41
+f 41//41 45//45 42//42
+f 42//42 45//45 44//44
+f 44//44 52//52 46//46
+f 43//43 19//19 45//45
+f 45//45 19//19 44//44
+f 44//44 19//19 52//52
+f 19//19 26//26 27//27
+f 36//36 46//46 35//35
+f 35//35 48//48 38//38
+f 35//35 46//46 47//47
+f 35//35 47//47 48//48
+f 47//47 49//49 48//48
+f 48//48 49//49 50//50
+f 49//49 51//51 50//50
+f 50//50 51//51 23//23
+f 46//46 52//52 47//47
+f 47//47 52//52 49//49
+f 49//49 52//52 51//51
+f 51//51 27//27 23//23
+f 52//52 27//27 51//51
+f 52//52 19//19 27//27
+# 100 faces, 0 coords texture
+# End of File
-- 
GitLab