diff --git a/.gitignore b/.gitignore
index 9b148120db3b0f916bc876d5d4de33e4da0c2a21..76b5902a38a00ce5a9e32719f35a5cca3f0e333b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,7 @@ qrc_*
 
 # CLion indexing
 *.uuid
+.fleet
 
 
 # Generated files
@@ -32,11 +33,13 @@ qrc_*
 # Visual Studio Code
 /.vscode
 
+# Zed
+/.cache*
+
 # CLion
 *.idea
 *.clion*
 
-
 # QtCreator
 CMakeLists.txt.user.*
 
diff --git a/apps/benchmarks/NonUniformGridCPU/CMakeLists.txt b/apps/benchmarks/NonUniformGridCPU/CMakeLists.txt
index 2b37ed6fb19797229ae5507f4d26f3f031491a87..1a58f36da0f48627a2e7f274a712fecad5a93437 100644
--- a/apps/benchmarks/NonUniformGridCPU/CMakeLists.txt
+++ b/apps/benchmarks/NonUniformGridCPU/CMakeLists.txt
@@ -11,11 +11,6 @@ waLBerla_generate_target_from_python(NAME NonUniformGridCPUGenerated
         NonUniformGridCPUBoundaryCollection.h
         NonUniformGridCPUInfoHeader.h)
 
-waLBerla_add_executable( NAME NonUniformGridGenerator
-                         FILES NonUniformGridGenerator.cpp LdcSetup.h
-                         DEPENDS blockforest core field python_coupling )
-
-
 waLBerla_add_executable( NAME NonUniformGridCPU
-                         FILES NonUniformGridCPU.cpp LdcSetup.h
+                         FILES NonUniformGridCPU.cpp LdcSetup.h GridGeneration.h
                          DEPENDS blockforest boundary core domain_decomposition field geometry lbm_generated python_coupling timeloop vtk NonUniformGridCPUGenerated )
diff --git a/apps/benchmarks/NonUniformGridCPU/GridGeneration.h b/apps/benchmarks/NonUniformGridCPU/GridGeneration.h
new file mode 100644
index 0000000000000000000000000000000000000000..ec7069bc94a59515ce2c24491b93e50e4cd3ec0d
--- /dev/null
+++ b/apps/benchmarks/NonUniformGridCPU/GridGeneration.h
@@ -0,0 +1,144 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file GridGeneration.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#pragma once
+
+#include "blockforest/Initialization.h"
+#include "blockforest/SetupBlock.h"
+#include "blockforest/SetupBlockForest.h"
+#include "blockforest/loadbalancing/StaticCurve.h"
+
+#include "core/Environment.h"
+#include "core/logging/Initialization.h"
+#include "core/timing/RemainingTimeLogger.h"
+#include "core/timing/TimingPool.h"
+
+#include <string>
+
+#include "LdcSetup.h"
+#include "NonUniformGridCPUInfoHeader.h"
+
+using StorageSpecification_T = lbm::NonUniformGridCPUStorageSpecification;
+using Stencil_T              = StorageSpecification_T::Stencil;
+
+using namespace walberla;
+
+void createSetupBlockForest(SetupBlockForest& setupBfs,
+                            const Config::BlockHandle& domainSetup, const Config::BlockHandle& blockForestSetup,
+                            const bool useMPIManager=false)
+{
+   WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...")
+
+   Vector3<real_t> domainSize = domainSetup.getParameter<Vector3<real_t> >("domainSize");
+   Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
+   Vector3<uint_t> rootBlocks = domainSetup.getParameter<Vector3<uint_t> >("rootBlocks");
+   Vector3<bool> periodic = domainSetup.getParameter<Vector3<bool> >("periodic");
+
+   const uint_t refinementDepth = blockForestSetup.getParameter< uint_t >("refinementDepth", uint_c(1));
+   uint_t numProcesses = blockForestSetup.getParameter< uint_t >( "numProcesses");
+   const std::string blockForestFilestem = blockForestSetup.getParameter< std::string > ("blockForestFilestem", "blockforest");
+   const bool writeVtk = blockForestSetup.getParameter< bool >("writeVtk", false);
+   const bool outputStatistics = blockForestSetup.getParameter< bool >("outputStatistics", false);
+
+   if(useMPIManager)
+      numProcesses = uint_c(mpi::MPIManager::instance()->numProcesses());
+
+   const LDC ldc(refinementDepth);
+
+   auto refSelection = ldc.refinementSelector();
+   setupBfs.addRefinementSelectionFunction(std::function<void(SetupBlockForest &)>(refSelection));
+   const AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), domainSize[0], domainSize[1], domainSize[2]);
+   setupBfs.addWorkloadMemorySUIDAssignmentFunction(blockforest::uniformWorkloadAndMemoryAssignment);
+   setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2], periodic[0], periodic[1], periodic[2]);
+   setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numProcesses);
+
+   if(mpi::MPIManager::instance()->numProcesses() > 1)
+      return;
+
+   {
+      std::ostringstream oss;
+      oss << blockForestFilestem << ".bfs";
+      setupBfs.saveToFile(oss.str().c_str());
+   }
+
+   if(writeVtk){
+      setupBfs.writeVTKOutput(blockForestFilestem);
+   }
+
+   if(outputStatistics){
+      WALBERLA_LOG_INFO_ON_ROOT("===========================  BLOCK FOREST STATISTICS ============================");
+      WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks())
+      for (uint_t level = 0; level <= refinementDepth; level++)
+      {
+         const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level);
+         WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks)
+      }
+
+      const real_t avgBlocksPerProc = real_c(setupBfs.getNumberOfBlocks()) / real_c(setupBfs.getNumberOfProcesses());
+      WALBERLA_LOG_INFO_ON_ROOT("Average blocks per process: " << avgBlocksPerProc);
+
+      const uint_t totalNumberCells = setupBfs.getNumberOfBlocks() * cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2];
+      const real_t averageCellsPerGPU = avgBlocksPerProc * real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]);
+
+      const uint_t PDFsPerCell = StorageSpecification_T::inplace ? Stencil_T::Q : 2 * Stencil_T::Q;
+      const uint_t valuesPerCell = (PDFsPerCell + VelocityField_T::F_SIZE + ScalarField_T::F_SIZE);
+      const uint_t sizePerValue = sizeof(StorageSpecification_T::value_type);
+      const double expectedMemory = double_c(totalNumberCells * valuesPerCell * sizePerValue) * 1e-9;
+      const double expectedMemoryPerGPU = double_c(averageCellsPerGPU * valuesPerCell * sizePerValue) * 1e-9;
+
+      WALBERLA_LOG_INFO_ON_ROOT( "Total number of cells will be " << totalNumberCells << " fluid cells (in total on all levels)")
+      WALBERLA_LOG_INFO_ON_ROOT( "Expected total memory demand will be " << expectedMemory << " GB")
+      WALBERLA_LOG_INFO_ON_ROOT( "Average memory demand per GPU will be " << expectedMemoryPerGPU << " GB")
+
+      WALBERLA_LOG_INFO_ON_ROOT("=================================================================================");
+   }
+}
+
+void createBlockForest(shared_ptr< BlockForest >& bfs,
+                       const Config::BlockHandle& domainSetup, const Config::BlockHandle& blockForestSetup)
+{
+   if (mpi::MPIManager::instance()->numProcesses() > 1)
+   {
+      const std::string blockForestFilestem =
+         blockForestSetup.getParameter< std::string >("blockForestFilestem", "blockforest");
+      // Load structured block forest from file
+      std::ostringstream oss;
+      oss << blockForestFilestem << ".bfs";
+      const std::string setupBlockForestFilepath = oss.str();
+      std::ifstream infile(setupBlockForestFilepath.c_str());
+      if(!infile.good())
+      {
+         WALBERLA_LOG_WARNING_ON_ROOT("Blockforest was not created beforehand and thus needs to be created on the fly. For large simulation runs this can be a severe problem!")
+         SetupBlockForest setupBfs;
+         createSetupBlockForest(setupBfs, domainSetup, blockForestSetup, true);
+         bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
+      }
+      else
+      {
+         bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()),
+                                               setupBlockForestFilepath.c_str(), false);
+      }
+   }
+   else
+   {
+      SetupBlockForest setupBfs;
+      createSetupBlockForest(setupBfs, domainSetup, blockForestSetup);
+      bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
+   }
+}
\ No newline at end of file
diff --git a/apps/benchmarks/NonUniformGridCPU/LdcSetup.h b/apps/benchmarks/NonUniformGridCPU/LdcSetup.h
index 070656cb23582a4da83f954c3e108aa70e69b315..6fe5a508965e61e6bb22cc9cef82a4520096b4ee 100644
--- a/apps/benchmarks/NonUniformGridCPU/LdcSetup.h
+++ b/apps/benchmarks/NonUniformGridCPU/LdcSetup.h
@@ -48,14 +48,8 @@ class LDCRefinement
    {
       const AABB & domain = forest.getDomain();
 
-      const real_t xSize = ( domain.xSize() / real_t(12) ) * real_c( 0.99 );
-      const real_t ySize = ( domain.ySize() / real_t(12) ) * real_c( 0.99 );
-
-      const AABB leftCorner( domain.xMin(), domain.yMin(), domain.zMin(),
-                            domain.xMin() + xSize, domain.yMin() + ySize, domain.zMax() );
-
-      const AABB rightCorner( domain.xMax() - xSize, domain.yMin(), domain.zMin(),
-                             domain.xMax(), domain.yMin() + ySize, domain.zMax() );
+      const AABB leftCorner( 0, domain.yMax() -1, 0, 1, domain.yMax() , domain.zMax() );
+      const AABB rightCorner( domain.xMax() - 1, domain.yMax() -1, 0, domain.xMax(), domain.yMax() , domain.zMax() );
 
       for(auto & block : forest)
       {
diff --git a/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp b/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp
index 34fde13e360e3190c3ec811b0f810432f47adf00..84b782919b0bd0c6b45374d5e69bd9ce4f5febb0 100644
--- a/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp
+++ b/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp
@@ -38,6 +38,7 @@
 
 #include <cmath>
 
+#include "GridGeneration.h"
 #include "LdcSetup.h"
 #include "NonUniformGridCPUInfoHeader.h"
 #include "lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h"
@@ -77,23 +78,25 @@ int main(int argc, char** argv)
 
       auto config = *cfg;
       logging::configureLogging(config);
-
+      auto domainSetup      = config->getOneBlock("DomainSetup");
       auto blockForestSetup = config->getOneBlock("SetupBlockForest");
+      const bool writeSetupForestAndReturn = blockForestSetup.getParameter< bool >("writeSetupForestAndReturn", true);
+
       const std::string blockForestFilestem =
          blockForestSetup.getParameter< std::string >("blockForestFilestem", "blockforest");
       const uint_t refinementDepth = blockForestSetup.getParameter< uint_t >("refinementDepth", uint_c(1));
 
-      auto domainSetup                = config->getOneBlock("DomainSetup");
       Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
 
-      // Load structured block forest from file
-      std::ostringstream oss;
-      oss << blockForestFilestem << ".bfs";
-      const std::string setupBlockForestFilepath = oss.str();
+      shared_ptr< BlockForest > bfs;
+      createBlockForest(bfs, domainSetup, blockForestSetup);
+
+      if (writeSetupForestAndReturn && mpi::MPIManager::instance()->numProcesses() == 1)
+      {
+         WALBERLA_LOG_INFO_ON_ROOT("BlockForest has been created and writen to file. Returning program")
+         return EXIT_SUCCESS;
+      }
 
-      WALBERLA_LOG_INFO_ON_ROOT("Creating structured block forest...")
-      auto bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()),
-                                                 setupBlockForestFilepath.c_str(), false);
       auto blocks =
          std::make_shared< StructuredBlockForest >(bfs, cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2]);
       blocks->createCellBoundingBoxes();
@@ -173,6 +176,8 @@ int main(int argc, char** argv)
       const uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
       const bool useVTKAMRWriter = parameters.getParameter< bool >("useVTKAMRWriter", false);
       const bool oneFilePerProcess = parameters.getParameter< bool >("oneFilePerProcess", false);
+
+      auto finalDomain = blocks->getDomain();
       if (vtkWriteFrequency > 0)
       {
          auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
@@ -180,6 +185,12 @@ int main(int argc, char** argv)
          auto velWriter = make_shared< field::VTKWriter< VelocityField_T, float32 > >(velFieldID, "vel");
          vtkOutput->addCellDataWriter(velWriter);
 
+         if (parameters.getParameter< bool >("writeOnlySlice", true)){
+            const AABB sliceXY(finalDomain.xMin(), finalDomain.yMin(), finalDomain.center()[2] - blocks->dz(refinementDepth),
+                               finalDomain.xMax(), finalDomain.yMax(), finalDomain.center()[2] + blocks->dz(refinementDepth));
+            vtkOutput->addCellInclusionFilter(vtk::AABBCellFilter(sliceXY));
+         }
+
          vtkOutput->addBeforeFunction([&]() {
             for (auto& block : *blocks)
                sweepCollection.calculateMacroscopicParameters(&block);
@@ -236,6 +247,8 @@ int main(int argc, char** argv)
                pythonCallbackResults.data().exposeValue("numProcesses", performance.processes());
                pythonCallbackResults.data().exposeValue("numThreads", performance.threads());
                pythonCallbackResults.data().exposeValue("numCores", performance.cores());
+               pythonCallbackResults.data().exposeValue("numberOfCells", performance.numberOfCells());
+               pythonCallbackResults.data().exposeValue("numberOfFluidCells", performance.numberOfFluidCells());
                pythonCallbackResults.data().exposeValue("mlups", performance.mlups(timesteps, time));
                pythonCallbackResults.data().exposeValue("mlupsPerCore", performance.mlupsPerCore(timesteps, time));
                pythonCallbackResults.data().exposeValue("mlupsPerProcess",
diff --git a/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.py b/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.py
index 25e9342058f24b7ae6008ee66a42beb8dd230a5a..368fd56936e594a6c3c6f172c3177f434329e2e7 100644
--- a/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.py
+++ b/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.py
@@ -23,17 +23,23 @@ const bool infoCsePdfs = {cse_pdfs};
 
 with CodeGeneration() as ctx:
     field_type = "float64" if ctx.double_accuracy else "float32"
+    cpu_vec = {"instruction_set": None}
 
-    streaming_pattern = 'aa'
+    streaming_pattern = 'esopull'
     timesteps = get_timesteps(streaming_pattern)
     stencil = LBStencil(Stencil.D3Q19)
+    method_enum = Method.CUMULANT
+
+    fourth_order_correction = 0.01 if method_enum == Method.CUMULANT and stencil.Q == 27 else False
+    collision_setup = "cumulant-K17" if fourth_order_correction else method_enum.name.lower()
 
     assert stencil.D == 3, "This application supports only three-dimensional stencils"
     pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {field_type}[3D]", layout='fzyx')
     density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_type}[3D]", layout='fzyx')
     macroscopic_fields = {'density': density_field, 'velocity': velocity_field}
 
-    lbm_config = LBMConfig(stencil=stencil, method=Method.SRT, relaxation_rate=omega, compressible=True,
+    lbm_config = LBMConfig(stencil=stencil, method=method_enum, relaxation_rate=omega, compressible=True,
+                           fourth_order_correction=fourth_order_correction,
                            streaming_pattern=streaming_pattern)
     lbm_opt = LBMOptimisation(cse_global=False, field_layout="fzyx")
 
@@ -50,12 +56,12 @@ with CodeGeneration() as ctx:
                          lbm_config=lbm_config, lbm_optimisation=lbm_opt,
                          nonuniform=True, boundaries=[no_slip, ubb],
                          macroscopic_fields=macroscopic_fields,
-                         target=ps.Target.CPU)
+                         target=ps.Target.CPU, cpu_vectorize_info=cpu_vec,)
 
     infoHeaderParams = {
         'stencil': stencil.name.lower(),
         'streaming_pattern': streaming_pattern,
-        'collision_setup': lbm_config.method.name.lower(),
+        'collision_setup': collision_setup,
         'cse_global': int(lbm_opt.cse_global),
         'cse_pdfs': int(lbm_opt.cse_pdfs),
     }
diff --git a/apps/benchmarks/NonUniformGridCPU/NonUniformGridGenerator.cpp b/apps/benchmarks/NonUniformGridCPU/NonUniformGridGenerator.cpp
deleted file mode 100644
index d7eab3046a8969f7ccc09bc7941315d924412822..0000000000000000000000000000000000000000
--- a/apps/benchmarks/NonUniformGridCPU/NonUniformGridGenerator.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-//======================================================================================================================
-//
-//  This file is part of waLBerla. waLBerla is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file NonUniformGridGenerator.cpp
-//! \author Frederik Hennig <frederik.hennig@fau.de>
-//
-//======================================================================================================================
-
-#include "blockforest/Initialization.h"
-#include "blockforest/SetupBlock.h"
-#include "blockforest/SetupBlockForest.h"
-#include "blockforest/loadbalancing/StaticCurve.h"
-
-#include "core/all.h"
-
-#include "python_coupling/CreateConfig.h"
-
-#include <string>
-
-#include "LdcSetup.h"
-
-using namespace walberla;
-
-
-int main(int argc, char ** argv){
-   const mpi::Environment env(argc, argv);
-   mpi::MPIManager::instance()->useWorldComm();
-
-   if(mpi::MPIManager::instance()->numProcesses() > 1){
-      WALBERLA_ABORT("Commandment: Thou shalt not run thy grid generator with more than one process.");
-   }
-
-   for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg)
-   {
-      auto config = *cfg;
-      auto domainSetup = config->getOneBlock("DomainSetup");
-
-      Vector3<real_t> domainSize = domainSetup.getParameter<Vector3<real_t> >("domainSize");
-      Vector3<uint_t> rootBlocks = domainSetup.getParameter<Vector3<uint_t> >("rootBlocks");
-      Vector3<bool> periodic = domainSetup.getParameter<Vector3<bool> >("periodic");
-
-      auto blockForestSetup = config->getOneBlock("SetupBlockForest");
-      const uint_t refinementDepth = blockForestSetup.getParameter< uint_t >("refinementDepth", uint_c(1));
-      const uint_t numProcesses = blockForestSetup.getParameter< uint_t >( "numProcesses");
-      const std::string blockForestFilestem = blockForestSetup.getParameter< std::string > ("blockForestFilestem", "blockforest");
-      const bool writeVtk = blockForestSetup.getParameter< bool >("writeVtk", false);
-      const bool outputStatistics = blockForestSetup.getParameter< bool >("outputStatistics", false);
-
-      const LDC ldc(refinementDepth);
-      SetupBlockForest setupBfs;
-
-      auto refSelection = ldc.refinementSelector();
-      setupBfs.addRefinementSelectionFunction(std::function<void(SetupBlockForest &)>(refSelection));
-      const AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), domainSize[0], domainSize[1], domainSize[2]);
-      setupBfs.addWorkloadMemorySUIDAssignmentFunction(blockforest::uniformWorkloadAndMemoryAssignment);
-      setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2], periodic[0], periodic[1], periodic[2]);
-      setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numProcesses);
-
-      {
-         std::ostringstream oss;
-         oss << blockForestFilestem << ".bfs";
-         setupBfs.saveToFile(oss.str().c_str());
-      }
-
-      if(writeVtk){
-         setupBfs.writeVTKOutput(blockForestFilestem);
-      }
-
-      if(outputStatistics){
-         WALBERLA_LOG_INFO_ON_ROOT("===========================  BLOCK FOREST STATISTICS ============================");
-         WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks())
-         for (uint_t level = 0; level <= refinementDepth; level++)
-         {
-            const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level);
-            WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks)
-         }
-
-         const uint_t avgBlocksPerProc = setupBfs.getNumberOfBlocks() / setupBfs.getNumberOfProcesses();
-         WALBERLA_LOG_INFO_ON_ROOT("Average blocks per process: " << avgBlocksPerProc);
-         WALBERLA_LOG_INFO_ON_ROOT("=================================================================================");
-      }
-
-
-      WALBERLA_LOG_INFO_ON_ROOT("Ending program")
-   }
-}
diff --git a/apps/benchmarks/NonUniformGridCPU/simulation_setup/benchmark_configs.py b/apps/benchmarks/NonUniformGridCPU/simulation_setup/benchmark_configs.py
index 51a0220b02b9316053f1e2f175d8e2d1aaf017b2..108b431f74ce60ddda0a69cb9ea61d075d8291ce 100644
--- a/apps/benchmarks/NonUniformGridCPU/simulation_setup/benchmark_configs.py
+++ b/apps/benchmarks/NonUniformGridCPU/simulation_setup/benchmark_configs.py
@@ -1,10 +1,25 @@
 import waLBerla as wlb
+from waLBerla.tools.config import block_decomposition
 from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle
 import sqlite3
 import os
 import sys
 
+try:
+    import machinestate as ms
+except ImportError:
+    ms = None
+
 DB_FILE = os.environ.get('DB_FILE', "cpu_benchmark.sqlite3")
+BENCHMARK = int(os.environ.get('BENCHMARK', 0))
+
+WeakX = int(os.environ.get('WeakX', 128))
+WeakY = int(os.environ.get('WeakY', 128))
+WeakZ = int(os.environ.get('WeakZ', 128))
+
+StrongX = int(os.environ.get('StrongX', 128))
+StrongY = int(os.environ.get('StrongY', 128))
+StrongZ = int(os.environ.get('StrongZ', 128))
 
 
 class Scenario:
@@ -18,7 +33,8 @@ class Scenario:
                  vtk_write_frequency=0,
                  logger_frequency=0,
                  blockforest_filestem="blockforest",
-                 write_setup_vtk=False):
+                 write_setup_vtk=True,
+                 db_file_name=None):
 
         self.domain_size = domain_size
         self.root_blocks = root_blocks
@@ -34,6 +50,8 @@ class Scenario:
         self.vtk_write_frequency = vtk_write_frequency
         self.logger_frequency = logger_frequency
 
+        self.db_file_name = DB_FILE if db_file_name is None else db_file_name
+
         self.config_dict = self.config(print_dict=False)
 
     @wlb.member_callback
@@ -51,7 +69,8 @@ class Scenario:
                 'numProcesses': self.num_processes,
                 'blockForestFilestem': self.bfs_filestem,
                 'writeVtk': self.write_setup_vtk,
-                'outputStatistics': False
+                'outputStatistics': True,
+                'writeSetupForestAndReturn': True,
             },
             'Parameters': {
                 'omega': 1.95,
@@ -59,14 +78,15 @@ class Scenario:
                 'remainingTimeLoggerFrequency': self.logger_frequency,
                 'vtkWriteFrequency': self.vtk_write_frequency,
                 'useVTKAMRWriter': True,
-                'oneFilePerProcess': False
+                'oneFilePerProcess': False,
+                'writeOnlySlice': False
             },
             'Logging': {
                 'logLevel': "info",
             }
         }
 
-        if (print_dict):
+        if print_dict:
             wlb.log_info_on_root("Scenario:\n" + pformat(config_dict))
 
         return config_dict
@@ -82,6 +102,15 @@ class Scenario:
         data['compile_flags'] = wlb.build_info.compiler_flags
         data['walberla_version'] = wlb.build_info.version
         data['build_machine'] = wlb.build_info.build_machine
+
+        if ms:
+            state = ms.MachineState(extended=False, anonymous=True)
+            state.generate()                        # generate subclasses
+            state.update()                          # read information
+            data["MachineState"] = str(state.get())
+        else:
+            print("MachineState module is not available. MachineState was not saved")
+
         sequenceValuesToScalars(data)
 
         result = data
@@ -92,52 +121,109 @@ class Scenario:
         table_name = table_name.replace("-", "_")
         for num_try in range(num_tries):
             try:
-                checkAndUpdateSchema(result, table_name, DB_FILE)
-                storeSingle(result, table_name, DB_FILE)
+                checkAndUpdateSchema(result, table_name, self.db_file_name)
+                storeSingle(result, table_name, self.db_file_name)
                 break
             except sqlite3.OperationalError as e:
                 wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries}  {str(e)}")
 
 
-def validation_run():
-    """Run with full periodic shear flow or boundary scenario (ldc) to check if the code works"""
-    wlb.log_info_on_root("Validation run")
+def weak_scaling_ldc(num_proc, uniform=False):
+    wlb.log_info_on_root("Running weak scaling benchmark...")
 
-    domain_size = (96, 96, 96)
-    cells_per_block = (32, 32, 32)
+    # This benchmark must run from 16 processes onwards
+    if wlb.mpi.numProcesses() > 1:
+        num_proc = wlb.mpi.numProcesses()
+
+    if uniform:
+        factor = 3 * num_proc
+        name = "uniform"
+    else:
+        if num_proc % 16 != 0:
+            raise RuntimeError("Number of processes must be dividable by 16")
+        factor = int(num_proc // 16)
+        name = "nonuniform"
+
+    cells_per_block = (WeakX, WeakY, WeakZ)
+    domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor)
 
     root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
 
     scenarios = wlb.ScenarioManager()
-    scenario = Scenario(domain_size=domain_size,
+    scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}",
+                        domain_size=domain_size,
                         root_blocks=root_blocks,
-                        num_processes=1,
-                        refinement_depth=1,
+                        num_processes=num_proc,
                         cells_per_block=cells_per_block,
-                        timesteps=201,
-                        vtk_write_frequency=100,
-                        logger_frequency=5,
-                        write_setup_vtk=True)
+                        refinement_depth=0 if uniform else 3,
+                        timesteps=10,
+                        db_file_name=f"weakScalingCPU{name}LDC.sqlite3")
+    scenarios.add(scenario)
+
+
+def strong_scaling_ldc(num_proc, uniform=False):
+    wlb.log_info_on_root("Running strong scaling benchmark...")
+
+    # This benchmark must run from 64 GPUs onwards
+    if wlb.mpi.numProcesses() > 1:
+        num_proc = wlb.mpi.numProcesses()
+
+    if num_proc % 64 != 0:
+        raise RuntimeError("Number of processes must be dividable by 64")
+
+    cells_per_block = (StrongX, StrongY, StrongZ)
+
+    if uniform:
+        domain_size = (cells_per_block[0] * 2, cells_per_block[1] * 2, cells_per_block[2] * 16)
+        name = "uniform"
+    else:
+        factor = int(num_proc / 64)
+        blocks64 = block_decomposition(factor)
+        cells_per_block = tuple([int(c / b) for c, b in zip(cells_per_block, reversed(blocks64))])
+        domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor)
+        name = "nonuniform"
+
+    root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
+
+    scenarios = wlb.ScenarioManager()
+    scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}",
+                        domain_size=domain_size,
+                        root_blocks=root_blocks,
+                        num_processes=num_proc,
+                        cells_per_block=cells_per_block,
+                        refinement_depth=0 if uniform else 3,
+                        timesteps=10,
+                        db_file_name=f"strongScalingCPU{name}LDC.sqlite3")
     scenarios.add(scenario)
 
 
-def scaling():
-    wlb.log_info_on_root("Running scaling benchmark...")
+def validation_run():
+    """Run with full periodic shear flow or boundary scenario (ldc) to check if the code works"""
+    wlb.log_info_on_root("Validation run")
 
-    numProc = wlb.mpi.numProcesses()
+    domain_size = (96, 96, 32)
+    cells_per_block = (32, 32, 32)
 
-    domain_size = (256, 256, 128 * numProc)
-    cells_per_block = (64, 64, 64)
     root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
 
     scenarios = wlb.ScenarioManager()
     scenario = Scenario(domain_size=domain_size,
                         root_blocks=root_blocks,
+                        num_processes=1,
+                        refinement_depth=3,
                         cells_per_block=cells_per_block,
-                        refinement_depth=2,
-                        timesteps=10)
+                        timesteps=1001,
+                        vtk_write_frequency=100,
+                        logger_frequency=5,
+                        write_setup_vtk=True)
     scenarios.add(scenario)
 
 
-validation_run()
-# scaling()
+if BENCHMARK == 0:
+    validation_run()
+elif BENCHMARK == 1:
+    weak_scaling_ldc(1, False)
+elif BENCHMARK == 2:
+    strong_scaling_ldc(1, False)
+else:
+    print(f"Invalid benchmark case {BENCHMARK}")
diff --git a/apps/benchmarks/NonUniformGridGPU/CMakeLists.txt b/apps/benchmarks/NonUniformGridGPU/CMakeLists.txt
index f6b4e1ff3779f624c8fb9845425d9d6a86103ee9..c8b02785b104a80075d2e36a1647eceec8a467c3 100644
--- a/apps/benchmarks/NonUniformGridGPU/CMakeLists.txt
+++ b/apps/benchmarks/NonUniformGridGPU/CMakeLists.txt
@@ -11,5 +11,5 @@ waLBerla_generate_target_from_python(NAME NonUniformGridGPUGenerated
         NonUniformGridGPUBoundaryCollection.h
         NonUniformGridGPUInfoHeader.h)
 waLBerla_add_executable( NAME NonUniformGridGPU
-                         FILES NonUniformGridGPU.cpp LdcSetup.h
+                         FILES NonUniformGridGPU.cpp LdcSetup.h GridGeneration.h
                          DEPENDS blockforest boundary core gpu domain_decomposition field geometry lbm_generated python_coupling timeloop vtk NonUniformGridGPUGenerated )
\ No newline at end of file
diff --git a/apps/benchmarks/NonUniformGridGPU/GridGeneration.h b/apps/benchmarks/NonUniformGridGPU/GridGeneration.h
new file mode 100644
index 0000000000000000000000000000000000000000..5de0a45d386a65fc984d87ec40f9f21caa718628
--- /dev/null
+++ b/apps/benchmarks/NonUniformGridGPU/GridGeneration.h
@@ -0,0 +1,139 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file GridGeneration.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#pragma once
+
+#include "blockforest/Initialization.h"
+#include "blockforest/SetupBlock.h"
+#include "blockforest/SetupBlockForest.h"
+#include "blockforest/loadbalancing/StaticCurve.h"
+
+#include "core/Environment.h"
+#include "core/logging/Initialization.h"
+#include "core/timing/RemainingTimeLogger.h"
+#include "core/timing/TimingPool.h"
+
+#include <string>
+
+#include "LdcSetup.h"
+#include "NonUniformGridGPUInfoHeader.h"
+
+using StorageSpecification_T = lbm::NonUniformGridGPUStorageSpecification;
+using Stencil_T              = StorageSpecification_T::Stencil;
+
+using namespace walberla;
+
+void createSetupBlockForest(SetupBlockForest& setupBfs,
+                            const Config::BlockHandle& domainSetup, const Config::BlockHandle& blockForestSetup,
+                            const bool useMPIManager=false)
+{
+   WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...")
+
+   Vector3<real_t> domainSize = domainSetup.getParameter<Vector3<real_t> >("domainSize");
+   Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
+   Vector3<uint_t> rootBlocks = domainSetup.getParameter<Vector3<uint_t> >("rootBlocks");
+   Vector3<bool> periodic = domainSetup.getParameter<Vector3<bool> >("periodic");
+
+   const uint_t refinementDepth = blockForestSetup.getParameter< uint_t >("refinementDepth", uint_c(1));
+   uint_t numProcesses = blockForestSetup.getParameter< uint_t >( "numProcesses");
+   const std::string blockForestFilestem = blockForestSetup.getParameter< std::string > ("blockForestFilestem", "blockforest");
+   const bool writeVtk = blockForestSetup.getParameter< bool >("writeVtk", false);
+   const bool outputStatistics = blockForestSetup.getParameter< bool >("outputStatistics", false);
+
+   if(useMPIManager)
+      numProcesses = uint_c(mpi::MPIManager::instance()->numProcesses());
+
+   const LDC ldc(refinementDepth);
+
+   auto refSelection = ldc.refinementSelector();
+   setupBfs.addRefinementSelectionFunction(std::function<void(SetupBlockForest &)>(refSelection));
+   const AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), domainSize[0], domainSize[1], domainSize[2]);
+   setupBfs.addWorkloadMemorySUIDAssignmentFunction(blockforest::uniformWorkloadAndMemoryAssignment);
+   setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2], periodic[0], periodic[1], periodic[2]);
+   setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numProcesses);
+
+   if(mpi::MPIManager::instance()->numProcesses() > 1)
+      return;
+
+   {
+      std::ostringstream oss;
+      oss << blockForestFilestem << ".bfs";
+      setupBfs.saveToFile(oss.str().c_str());
+   }
+
+   if(writeVtk){
+      setupBfs.writeVTKOutput(blockForestFilestem);
+   }
+
+   if(outputStatistics){
+      WALBERLA_LOG_INFO_ON_ROOT("===========================  BLOCK FOREST STATISTICS ============================");
+      WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks())
+      for (uint_t level = 0; level <= refinementDepth; level++){
+         const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level);
+         WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks)
+      }
+
+      const real_t avgBlocksPerProc = real_c(setupBfs.getNumberOfBlocks()) / real_c(setupBfs.getNumberOfProcesses());
+      WALBERLA_LOG_INFO_ON_ROOT("Average blocks per process: " << avgBlocksPerProc);
+
+      const uint_t totalNumberCells = setupBfs.getNumberOfBlocks() * cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2];
+      const real_t averageCellsPerGPU = avgBlocksPerProc * real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]);
+
+      const uint_t PDFsPerCell = StorageSpecification_T::inplace ? Stencil_T::Q : 2 * Stencil_T::Q;
+      const uint_t valuesPerCell = (PDFsPerCell + VelocityField_T::F_SIZE + ScalarField_T::F_SIZE);
+      const uint_t sizePerValue = sizeof(StorageSpecification_T::value_type);
+      const double expectedMemory = double_c(totalNumberCells * valuesPerCell * sizePerValue) * 1e-9;
+      const double expectedMemoryPerGPU = double_c(averageCellsPerGPU * valuesPerCell * sizePerValue) * 1e-9;
+
+      WALBERLA_LOG_INFO_ON_ROOT( "Total number of cells will be " << totalNumberCells << " fluid cells (in total on all levels)")
+      WALBERLA_LOG_INFO_ON_ROOT( "Expected total memory demand will be " << expectedMemory << " GB")
+      WALBERLA_LOG_INFO_ON_ROOT( "Average memory demand per GPU will be " << expectedMemoryPerGPU << " GB")
+
+      WALBERLA_LOG_INFO_ON_ROOT("=================================================================================");
+   }
+}
+
+void createBlockForest(shared_ptr< BlockForest >& bfs,
+                       const Config::BlockHandle& domainSetup, const Config::BlockHandle& blockForestSetup)
+{
+   if (mpi::MPIManager::instance()->numProcesses() > 1){
+      const std::string blockForestFilestem =
+         blockForestSetup.getParameter< std::string >("blockForestFilestem", "blockforest");
+      // Load structured block forest from file
+      std::ostringstream oss;
+      oss << blockForestFilestem << ".bfs";
+      const std::string setupBlockForestFilepath = oss.str();
+      std::ifstream infile(setupBlockForestFilepath.c_str());
+      if(!infile.good()){
+         WALBERLA_LOG_WARNING_ON_ROOT("Blockforest was not created beforehand and thus needs to be created on the fly. For large simulation runs this can be a severe problem!")
+         SetupBlockForest setupBfs;
+         createSetupBlockForest(setupBfs, domainSetup, blockForestSetup, true);
+         bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
+      }
+      else{
+         bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()),
+                                               setupBlockForestFilepath.c_str(), false);
+      }
+   }
+   else{
+      SetupBlockForest setupBfs;
+      createSetupBlockForest(setupBfs, domainSetup, blockForestSetup);
+      bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
+   }
+}
\ No newline at end of file
diff --git a/apps/benchmarks/NonUniformGridGPU/LdcSetup.h b/apps/benchmarks/NonUniformGridGPU/LdcSetup.h
index 238943a7daa9745054980e6011d46ef037ef27ec..b8431f2f2779f7f32e7a0f66db5affb422e018f3 100644
--- a/apps/benchmarks/NonUniformGridGPU/LdcSetup.h
+++ b/apps/benchmarks/NonUniformGridGPU/LdcSetup.h
@@ -31,7 +31,9 @@
 #include "field/FlagUID.h"
 
 using namespace walberla;
+
 using RefinementSelectionFunctor = SetupBlockForest::RefinementSelectionFunction;
+
 using FlagField_T          = FlagField< uint8_t >;
 
 class LDCRefinement
@@ -46,14 +48,8 @@ class LDCRefinement
    {
       const AABB & domain = forest.getDomain();
 
-      const real_t xSize = ( domain.xSize() / real_t(12) ) * real_c( 0.99 );
-      const real_t ySize = ( domain.ySize() / real_t(12) ) * real_c( 0.99 );
-
-      const AABB leftCorner( domain.xMin(), domain.yMin(), domain.zMin(),
-                             domain.xMin() + xSize, domain.yMin() + ySize, domain.zMax() );
-
-      const AABB rightCorner( domain.xMax() - xSize, domain.yMin(), domain.zMin(),
-                              domain.xMax(), domain.yMin() + ySize, domain.zMax() );
+      const AABB leftCorner( 0, domain.yMax() -1, 0, 1, domain.yMax() , domain.zMax() );
+      const AABB rightCorner( domain.xMax() - 1, domain.yMax() -1, 0, domain.xMax(), domain.yMax() , domain.zMax() );
 
       for(auto & block : forest)
       {
@@ -99,8 +95,7 @@ class LDC
             Cell globalCell(localCell);
             sbfs.transformBlockLocalToGlobalCell(globalCell, b);
             if (globalCell.y() >= cell_idx_c(sbfs.getNumberOfYCells(level))) { flagField->addFlag(localCell, ubbFlag); }
-            else if (globalCell.z() < 0 || globalCell.y() < 0 || globalCell.x() < 0 ||
-                     globalCell.x() >= cell_idx_c(sbfs.getNumberOfXCells(level)) || globalCell.z() >= cell_idx_c(sbfs.getNumberOfZCells(level)))
+            else if (globalCell.y() < 0 || globalCell.x() < 0 || globalCell.x() >= cell_idx_c(sbfs.getNumberOfXCells(level)))
             {
                flagField->addFlag(localCell, noslipFlag);
             }
diff --git a/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp
index 233103342cdde4e21ab443cd3aadbece48b42c85..818f612b919dc53793f2c4a4807f11c1360b9ca1 100644
--- a/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp
+++ b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp
@@ -19,7 +19,6 @@
 //======================================================================================================================
 
 #include "blockforest/Initialization.h"
-#include "blockforest/loadbalancing/StaticCurve.h"
 
 #include "core/Environment.h"
 #include "core/logging/Initialization.h"
@@ -34,67 +33,55 @@
 
 #include "gpu/AddGPUFieldToStorage.h"
 #include "gpu/DeviceSelectMPI.h"
-#include "gpu/FieldCopy.h"
 #include "gpu/ErrorChecking.h"
+#include "gpu/FieldCopy.h"
 #include "gpu/HostFieldAllocator.h"
 #include "gpu/ParallelStreams.h"
 #include "gpu/communication/NonUniformGPUScheme.h"
 
-#include "lbm_generated/evaluation/PerformanceEvaluation.h"
-#include "lbm_generated/field/PdfField.h"
-#include "lbm_generated/field/AddToStorage.h"
-#include "lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h"
-#include "lbm_generated/gpu/GPUPdfField.h"
-#include "lbm_generated/gpu/AddToStorage.h"
-#include "lbm_generated/gpu/BasicRecursiveTimeStepGPU.h"
-
 #include "python_coupling/CreateConfig.h"
+#include "python_coupling/DictWrapper.h"
 #include "python_coupling/PythonCallback.h"
 
 #include <cmath>
 
+#include "GridGeneration.h"
 #include "LdcSetup.h"
 #include "NonUniformGridGPUInfoHeader.h"
+#include "lbm_generated/evaluation/PerformanceEvaluation.h"
+#include "lbm_generated/field/AddToStorage.h"
+#include "lbm_generated/field/PdfField.h"
+#include "lbm_generated/gpu/AddToStorage.h"
+#include "lbm_generated/gpu/BasicRecursiveTimeStepGPU.h"
+#include "lbm_generated/gpu/GPUPdfField.h"
+#include "lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h"
 using namespace walberla;
 
 using StorageSpecification_T = lbm::NonUniformGridGPUStorageSpecification;
-using Stencil_T = StorageSpecification_T::Stencil;
+using Stencil_T              = StorageSpecification_T::Stencil;
 using CommunicationStencil_T = StorageSpecification_T::CommunicationStencil;
 
-using PdfField_T = lbm_generated::PdfField< StorageSpecification_T >;
-using GPUPdfField_T = lbm_generated::GPUPdfField< StorageSpecification_T >;
-using FlagField_T = FlagField< uint8_t >;
+using PdfField_T           = lbm_generated::PdfField< StorageSpecification_T >;
+using GPUPdfField_T        = lbm_generated::GPUPdfField< StorageSpecification_T >;
+using FlagField_T          = FlagField< uint8_t >;
 using BoundaryCollection_T = lbm::NonUniformGridGPUBoundaryCollection< FlagField_T >;
 
 using SweepCollection_T = lbm::NonUniformGridGPUSweepCollection;
 
 using gpu::communication::NonUniformGPUScheme;
 
-namespace {
-void createSetupBlockForest(SetupBlockForest& setupBfs, const Config::BlockHandle& domainSetup, LDC& ldcSetup, const uint_t numProcesses=uint_c(MPIManager::instance()->numProcesses())) {
-    Vector3<real_t> domainSize = domainSetup.getParameter<Vector3<real_t> >("domainSize");
-    Vector3<uint_t> rootBlocks = domainSetup.getParameter<Vector3<uint_t> >("rootBlocks");
-    Vector3<bool> periodic = domainSetup.getParameter<Vector3<bool> >("periodic");
-
-    auto refSelection = ldcSetup.refinementSelector();
-    setupBfs.addRefinementSelectionFunction(std::function<void(SetupBlockForest &)>(refSelection));
-    const AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), domainSize[0], domainSize[1], domainSize[2]);
-    setupBfs.addWorkloadMemorySUIDAssignmentFunction(blockforest::uniformWorkloadAndMemoryAssignment);
-    setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2], periodic[0], periodic[1], periodic[2]);
-    setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numProcesses);
-}
-}
-
 int main(int argc, char** argv)
 {
    const mpi::Environment env(argc, argv);
    mpi::MPIManager::instance()->useWorldComm();
    gpu::selectDeviceBasedOnMpiRank();
 
+   const std::string input_filename(argv[1]);
+   const bool inputIsPython = string_ends_with(input_filename, ".py");
+
    for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg)
    {
       WALBERLA_MPI_WORLD_BARRIER()
-
       WALBERLA_GPU_CHECK(gpuPeekAtLastError())
 
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -103,68 +90,32 @@ int main(int argc, char** argv)
 
       auto config = *cfg;
       logging::configureLogging(config);
-      auto domainSetup              = config->getOneBlock("DomainSetup");
+      auto domainSetup        = config->getOneBlock("DomainSetup");
+      auto blockForestSetup   = config->getOneBlock("SetupBlockForest");
+      const bool writeSetupForestAndReturn = blockForestSetup.getParameter< bool >("writeSetupForestAndReturn", true);
+
       Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
       // Reading parameters
-      auto parameters          = config->getOneBlock("Parameters");
-      const real_t omega       = parameters.getParameter< real_t >("omega", real_c(1.4));
-      const uint_t refinementDepth = parameters.getParameter< uint_t >("refinementDepth", uint_c(1));
-      const uint_t timesteps   = parameters.getParameter< uint_t >("timesteps", uint_c(50));
-      const bool cudaEnabledMPI = parameters.getParameter< bool >("cudaEnabledMPI", false);
-      const bool writeSetupForestAndReturn = parameters.getParameter< bool >("writeSetupForestAndReturn", false);
-      const bool benchmarkKernelOnly = parameters.getParameter< bool >("benchmarkKernelOnly", false);
-      const uint_t numProcesses = parameters.getParameter< uint_t >( "numProcesses");
-
-      auto ldc = std::make_shared< LDC >(refinementDepth );
-      SetupBlockForest setupBfs;
-      if (writeSetupForestAndReturn)
-      {
-         WALBERLA_LOG_INFO_ON_ROOT("Creating SetupBlockForest for " << numProcesses << " processes")
-         WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...")
-         createSetupBlockForest(setupBfs, domainSetup, *ldc, numProcesses);
-
-         WALBERLA_ROOT_SECTION() { setupBfs.writeVTKOutput("SetupBlockForest"); }
+      auto parameters   = config->getOneBlock("Parameters");
+      const real_t omega             = parameters.getParameter< real_t >("omega", real_c(1.95));
+      const uint_t timesteps         = parameters.getParameter< uint_t >("timesteps", uint_c(50));
+      const bool gpuEnabledMPI       = parameters.getParameter< bool >("gpuEnabledMPI", false);
 
-         WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks())
-         uint_t totalCellUpdates( 0.0 );
-         for (uint_t level = 0; level <= refinementDepth; level++)
-         {
-            const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level);
-            const uint_t numberOfCells = numberOfBlocks * cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2];
-            totalCellUpdates += timesteps * math::uintPow2(level)  * numberOfCells;
-            WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks)
-         }
-         cudaDeviceProp prop{};
-         WALBERLA_GPU_CHECK(gpuGetDeviceProperties(&prop, 0))
-
-         const uint_t totalNumberCells = setupBfs.getNumberOfBlocks() * cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2];
+      shared_ptr< BlockForest > bfs;
+      createBlockForest(bfs, domainSetup, blockForestSetup);
 
-         const uint_t PDFsPerCell = StorageSpecification_T::inplace ? Stencil_T::Q : 2 * Stencil_T::Q;
-         const uint_t valuesPerCell = (PDFsPerCell + VelocityField_T::F_SIZE + ScalarField_T::F_SIZE);
-         const uint_t sizePerValue = sizeof(PdfField_T::value_type);
-         const double totalGPUMem = double_c(prop.totalGlobalMem) * 1e-9;
-         const double expectedMemory = double_c(totalNumberCells * valuesPerCell * sizePerValue) * 1e-9;
-
-         WALBERLA_LOG_INFO_ON_ROOT( "Total number of cells will be " << totalNumberCells << " fluid cells (in total on all levels)")
-         WALBERLA_LOG_INFO_ON_ROOT( "Expected total memory demand will be " << expectedMemory << " GB")
-         WALBERLA_LOG_INFO_ON_ROOT( "The total cell updates after " << timesteps << " timesteps (on the coarse level) will be " << totalCellUpdates)
-         WALBERLA_LOG_INFO_ON_ROOT( "Total GPU memory " << totalGPUMem)
-
-         WALBERLA_LOG_INFO_ON_ROOT("Ending program")
+      if (writeSetupForestAndReturn && mpi::MPIManager::instance()->numProcesses() == 1)
+      {
+         WALBERLA_LOG_INFO_ON_ROOT("BlockForest has been created and writen to file. Returning program")
          return EXIT_SUCCESS;
       }
 
-      WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...")
-      createSetupBlockForest(setupBfs, domainSetup, *ldc);
-
-      // Create structured block forest
-      WALBERLA_LOG_INFO_ON_ROOT("Creating structured block forest...")
-      auto bfs    = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
-      auto blocks = std::make_shared< StructuredBlockForest >(bfs, cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2]);
+      auto blocks =
+         std::make_shared< StructuredBlockForest >(bfs, cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2]);
       blocks->createCellBoundingBoxes();
 
-      WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << blocks->getNumberOfBlocks())
-      for (uint_t level = 0; level <= refinementDepth; level++)
+      WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << blocks->getNumberOfBlocks() << " on " << blocks->getNumberOfLevels() << " refinement levels")
+      for (uint_t level = 0; level < blocks->getNumberOfLevels(); level++)
       {
          WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << blocks->getNumberOfBlocks(level))
       }
@@ -172,26 +123,35 @@ int main(int argc, char** argv)
       WALBERLA_LOG_INFO_ON_ROOT("Start field allocation")
       // Creating fields
       const StorageSpecification_T StorageSpec = StorageSpecification_T();
-      auto allocator = make_shared< gpu::HostFieldAllocator<real_t> >();
-      const BlockDataID pdfFieldCpuID  = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(2), field::fzyx, allocator);
-      const BlockDataID velFieldCpuID = field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx, uint_c(2), allocator);
-      const BlockDataID densityFieldCpuID = field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx, uint_c(2), allocator);
-      const BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field", uint_c(3));
-
-      const BlockDataID pdfFieldGpuID = lbm_generated::addGPUPdfFieldToStorage< PdfField_T >(blocks, pdfFieldCpuID, StorageSpec, "pdfs on GPU", true);
+      auto allocator                           = make_shared< gpu::HostFieldAllocator< real_t > >();
+      const BlockDataID pdfFieldCpuID =
+         lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(2), field::fzyx, allocator);
+      const BlockDataID velFieldCpuID =
+         field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx, uint_c(2), allocator);
+      const BlockDataID densityFieldCpuID =
+         field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx, uint_c(2), allocator);
+      const BlockDataID flagFieldID =
+         field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field", uint_c(3));
+
+      const BlockDataID pdfFieldGpuID =
+         lbm_generated::addGPUPdfFieldToStorage< PdfField_T >(blocks, pdfFieldCpuID, StorageSpec, "pdfs on GPU", true);
       const BlockDataID velFieldGpuID =
          gpu::addGPUFieldToStorage< VelocityField_T >(blocks, velFieldCpuID, "velocity on GPU", true);
       const BlockDataID densityFieldGpuID =
          gpu::addGPUFieldToStorage< ScalarField_T >(blocks, densityFieldCpuID, "velocity on GPU", true);
       WALBERLA_LOG_INFO_ON_ROOT("Finished field allocation")
 
-      const Cell innerOuterSplit = Cell(parameters.getParameter< Vector3<cell_idx_t> >("innerOuterSplit", Vector3<cell_idx_t>(1, 1, 1)));
-      Vector3< int32_t > gpuBlockSize = parameters.getParameter< Vector3< int32_t > >("gpuBlockSize", Vector3< int32_t >(256, 1, 1));
-      SweepCollection_T sweepCollection(blocks, pdfFieldGpuID, densityFieldGpuID, velFieldGpuID, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2], omega, innerOuterSplit);
-      for (auto& iBlock : *blocks)
-      {
-         sweepCollection.initialise(&iBlock, cell_idx_c(1), nullptr);
+      const Cell innerOuterSplit =
+         Cell(parameters.getParameter< Vector3< cell_idx_t > >("innerOuterSplit", Vector3< cell_idx_t >(1, 1, 1)));
+      Vector3< int32_t > gpuBlockSize =
+         parameters.getParameter< Vector3< int32_t > >("gpuBlockSize", Vector3< int32_t >(256, 1, 1));
+      SweepCollection_T sweepCollection(blocks, pdfFieldGpuID, densityFieldGpuID, velFieldGpuID, gpuBlockSize[0],
+                                        gpuBlockSize[1], gpuBlockSize[2], omega, innerOuterSplit);
+
+      for (auto& iBlock : *blocks){
+         sweepCollection.initialise(&iBlock, cell_idx_c(1));
       }
+      sweepCollection.initialiseBlockPointer();
       WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
       WALBERLA_GPU_CHECK(gpuPeekAtLastError())
       WALBERLA_MPI_BARRIER()
@@ -200,9 +160,11 @@ int main(int argc, char** argv)
       ///                                      LB SWEEPS AND BOUNDARY HANDLING                                       ///
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
+      auto ldc = std::make_shared< LDC >(blocks->getDepth());
+
       const FlagUID fluidFlagUID("Fluid");
       ldc->setupBoundaryFlagField(*blocks, flagFieldID);
-      geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, fluidFlagUID, 2);
+      geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, fluidFlagUID, 0);
       BoundaryCollection_T boundaryCollection(blocks, flagFieldID, pdfFieldGpuID, fluidFlagUID);
 
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -210,8 +172,8 @@ int main(int argc, char** argv)
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
       WALBERLA_LOG_INFO_ON_ROOT("Setting up communication...")
-      auto communication = std::make_shared< NonUniformGPUScheme <CommunicationStencil_T>> (blocks, cudaEnabledMPI);
-      auto packInfo = lbm_generated::setupNonuniformGPUPdfCommunication<GPUPdfField_T>(blocks, pdfFieldGpuID);
+      auto communication = std::make_shared< NonUniformGPUScheme< CommunicationStencil_T > >(blocks, gpuEnabledMPI);
+      auto packInfo      = lbm_generated::setupNonuniformGPUPdfCommunication< GPUPdfField_T >(blocks, pdfFieldGpuID);
       communication->addPackInfo(packInfo);
       WALBERLA_MPI_BARRIER()
 
@@ -224,28 +186,31 @@ int main(int argc, char** argv)
       sweepCollection.setOuterPriority(streamHighPriority);
       auto defaultStream = gpu::StreamRAII::newPriorityStream(streamLowPriority);
 
-      lbm_generated::BasicRecursiveTimeStepGPU< GPUPdfField_T, SweepCollection_T, BoundaryCollection_T > LBMMeshRefinement(blocks, pdfFieldGpuID, sweepCollection, boundaryCollection, communication, packInfo);
+      lbm_generated::BasicRecursiveTimeStepGPU< GPUPdfField_T, SweepCollection_T, BoundaryCollection_T >
+         LBMMeshRefinement(blocks, pdfFieldGpuID, sweepCollection, boundaryCollection, communication, packInfo);
       SweepTimeloop timeLoop(blocks->getBlockStorage(), timesteps);
 
-      // LBMMeshRefinement.test(5);
-      // return EXIT_SUCCESS;
-
-      if(benchmarkKernelOnly){
-         timeLoop.add() << Sweep(sweepCollection.streamCollide(SweepCollection_T::ALL), "LBM StreamCollide");
-      }
-      else{
-         LBMMeshRefinement.addRefinementToTimeLoop(timeLoop);
-      }
+      LBMMeshRefinement.addRefinementToTimeLoop(timeLoop, uint_c(0));
 
       // VTK
       const uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
-      if (vtkWriteFrequency > 0)
-      {
-         auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
-                                                         "simulation_step", false, true, true, false, 0);
+      const bool useVTKAMRWriter     = parameters.getParameter< bool >("useVTKAMRWriter", false);
+      const bool oneFilePerProcess   = parameters.getParameter< bool >("oneFilePerProcess", false);
+
+      auto finalDomain = blocks->getDomain();
+      if (vtkWriteFrequency > 0){
+         auto vtkOutput =
+            vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out", "simulation_step",
+                                           false, true, true, false, 0, useVTKAMRWriter, oneFilePerProcess);
          auto velWriter = make_shared< field::VTKWriter< VelocityField_T, float32 > >(velFieldCpuID, "vel");
          vtkOutput->addCellDataWriter(velWriter);
 
+         if (parameters.getParameter< bool >("writeOnlySlice", true)){
+            const AABB sliceXY(finalDomain.xMin(), finalDomain.yMin(), finalDomain.center()[2] - blocks->dz(blocks->getDepth()),
+                               finalDomain.xMax(), finalDomain.yMax(), finalDomain.center()[2] + blocks->dz(blocks->getDepth()));
+            vtkOutput->addCellInclusionFilter(vtk::AABBCellFilter(sliceXY));
+         }
+
          vtkOutput->addBeforeFunction([&]() {
             for (auto& block : *blocks)
                sweepCollection.calculateMacroscopicParameters(&block);
@@ -260,17 +225,17 @@ int main(int argc, char** argv)
 
       auto remainingTimeLoggerFrequency =
          parameters.getParameter< real_t >("remainingTimeLoggerFrequency", real_c(-1.0)); // in seconds
-      if (remainingTimeLoggerFrequency > 0)
-      {
+      if (remainingTimeLoggerFrequency > 0){
          auto logger = timing::RemainingTimeLogger(timeLoop.getNrOfTimeSteps(), remainingTimeLoggerFrequency);
          timeLoop.addFuncAfterTimeStep(logger, "remaining time logger");
       }
 
-      lbm_generated::PerformanceEvaluation<FlagField_T> const performance(blocks, flagFieldID, fluidFlagUID);
-      field::CellCounter< FlagField_T > fluidCells( blocks, flagFieldID, fluidFlagUID );
+      lbm_generated::PerformanceEvaluation< FlagField_T > const performance(blocks, flagFieldID, fluidFlagUID);
+      field::CellCounter< FlagField_T > fluidCells(blocks, flagFieldID, fluidFlagUID);
       fluidCells();
 
-      WALBERLA_LOG_INFO_ON_ROOT( "Non uniform Grid benchmark with " << fluidCells.numberOfCells() << " fluid cells (in total on all levels)")
+      WALBERLA_LOG_INFO_ON_ROOT("Non uniform Grid benchmark with " << fluidCells.numberOfCells()
+                                                                   << " fluid cells (in total on all levels)")
 
       WcTimingPool timeloopTiming;
       WcTimer simTimer;
@@ -294,6 +259,32 @@ int main(int argc, char** argv)
 
       const auto reducedTimeloopTiming = timeloopTiming.getReduced();
       WALBERLA_LOG_RESULT_ON_ROOT("Time loop timing:\n" << *reducedTimeloopTiming)
+
+      WALBERLA_ROOT_SECTION()
+      {
+         if (inputIsPython)
+         {
+            python_coupling::PythonCallback pythonCallbackResults("results_callback");
+            if (pythonCallbackResults.isCallable())
+            {
+               pythonCallbackResults.data().exposeValue("numProcesses", lbm_generated::PerformanceEvaluation< FlagField_T >::processes());
+               pythonCallbackResults.data().exposeValue("numThreads", performance.threads());
+               pythonCallbackResults.data().exposeValue("numCores", performance.cores());
+               pythonCallbackResults.data().exposeValue("numberOfCells", performance.numberOfCells());
+               pythonCallbackResults.data().exposeValue("numberOfFluidCells", performance.numberOfFluidCells());
+               pythonCallbackResults.data().exposeValue("mlups", performance.mlups(timesteps, time));
+               pythonCallbackResults.data().exposeValue("mlupsPerCore", performance.mlupsPerCore(timesteps, time));
+               pythonCallbackResults.data().exposeValue("mlupsPerProcess", performance.mlupsPerProcess(timesteps, time));
+               pythonCallbackResults.data().exposeValue("stencil", infoStencil);
+               pythonCallbackResults.data().exposeValue("streamingPattern", infoStreamingPattern);
+               pythonCallbackResults.data().exposeValue("collisionSetup", infoCollisionSetup);
+               pythonCallbackResults.data().exposeValue("cse_global", infoCseGlobal);
+               pythonCallbackResults.data().exposeValue("cse_pdfs", infoCsePdfs);
+               // Call Python function to report results
+               pythonCallbackResults();
+            }
+         }
+      }
    }
    return EXIT_SUCCESS;
 }
\ No newline at end of file
diff --git a/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.py b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.py
index d523b5c0c1b8dfcbfa1cf112c0342edfdee03c7d..a1f5bff2369cc72b8a45e10e7252523f4d739690 100644
--- a/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.py
+++ b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.py
@@ -7,7 +7,7 @@ from pystencils.typing import TypedSymbol
 from lbmpy.advanced_streaming.utility import get_timesteps
 from lbmpy.boundaries import NoSlip, UBB
 from lbmpy.creationfunctions import create_lb_method, create_lb_collision_rule
-from lbmpy import LBMConfig, LBMOptimisation, Stencil, Method, LBStencil
+from lbmpy import LBMConfig, LBMOptimisation, Stencil, Method, LBStencil, SubgridScaleModel
 
 from pystencils_walberla import CodeGeneration, generate_info_header
 from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator
@@ -30,20 +30,25 @@ const char * infoCollisionSetup = "{collision_setup}";
 const bool infoCseGlobal = {cse_global};
 const bool infoCsePdfs = {cse_pdfs};
 """
- 
+
 with CodeGeneration() as ctx:
     field_type = "float64" if ctx.double_accuracy else "float32"
 
-    streaming_pattern = 'pull'
+    streaming_pattern = 'esopull'
     timesteps = get_timesteps(streaming_pattern)
     stencil = LBStencil(Stencil.D3Q19)
+    method_enum = Method.CUMULANT
+
+    fourth_order_correction = 0.01 if method_enum == Method.CUMULANT and stencil.Q == 27 else False
+    collision_setup = "cumulant-K17" if fourth_order_correction else method_enum.name.lower()
 
     assert stencil.D == 3, "This application supports only three-dimensional stencils"
     pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {field_type}[3D]", layout='fzyx')
     density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_type}[3D]", layout='fzyx')
     macroscopic_fields = {'density': density_field, 'velocity': velocity_field}
 
-    lbm_config = LBMConfig(stencil=stencil, method=Method.SRT, relaxation_rate=omega,
+    lbm_config = LBMConfig(stencil=stencil, method=method_enum, relaxation_rate=omega, compressible=True,
+                           fourth_order_correction=fourth_order_correction,
                            streaming_pattern=streaming_pattern)
     lbm_opt = LBMOptimisation(cse_global=False, field_layout='fzyx')
 
@@ -66,7 +71,7 @@ with CodeGeneration() as ctx:
     infoHeaderParams = {
         'stencil': stencil.name.lower(),
         'streaming_pattern': streaming_pattern,
-        'collision_setup': lbm_config.method.name.lower(),
+        'collision_setup': collision_setup,
         'cse_global': int(lbm_opt.cse_global),
         'cse_pdfs': int(lbm_opt.cse_pdfs),
     }
diff --git a/apps/benchmarks/NonUniformGridGPU/simulation_setup/benchmark_configs.py b/apps/benchmarks/NonUniformGridGPU/simulation_setup/benchmark_configs.py
index 34bc6caa92b92d5239c9ca1409660b062247a469..6f7b6820a911fc7805352a07bd2af359dd4b6a7b 100644
--- a/apps/benchmarks/NonUniformGridGPU/simulation_setup/benchmark_configs.py
+++ b/apps/benchmarks/NonUniformGridGPU/simulation_setup/benchmark_configs.py
@@ -1,16 +1,58 @@
 import waLBerla as wlb
+from waLBerla.tools.config import block_decomposition
+from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle
+import sqlite3
+import os
+import sys
+
+try:
+    import machinestate as ms
+except ImportError:
+    ms = None
+
+DB_FILE = os.environ.get('DB_FILE', "gpu_benchmark.sqlite3")
+BENCHMARK = int(os.environ.get('BENCHMARK', 0))
+
+WeakX = int(os.environ.get('WeakX', 128))
+WeakY = int(os.environ.get('WeakY', 128))
+WeakZ = int(os.environ.get('WeakZ', 128))
+
+StrongX = int(os.environ.get('StrongX', 128))
+StrongY = int(os.environ.get('StrongY', 128))
+StrongZ = int(os.environ.get('StrongZ', 128))
 
 
 class Scenario:
-    def __init__(self, domain_size=(64, 64, 64), root_blocks=(2, 2, 2),
-                 cells_per_block=(32, 32, 32), refinement_depth=0):
+    def __init__(self,
+                 domain_size=(64, 64, 64),
+                 root_blocks=(2, 2, 2),
+                 num_processes=1,
+                 refinement_depth=0,
+                 cells_per_block=(32, 32, 32),
+                 timesteps=101,
+                 gpu_enabled_mpi=False,
+                 vtk_write_frequency=0,
+                 logger_frequency=30,
+                 blockforest_filestem="blockforest",
+                 write_setup_vtk=True,
+                 db_file_name=None):
 
         self.domain_size = domain_size
         self.root_blocks = root_blocks
         self.cells_per_block = cells_per_block
+        self.periodic = (0, 0, 1)
+
         self.refinement_depth = refinement_depth
+        self.num_processes = num_processes
+        self.bfs_filestem = blockforest_filestem
+        self.write_setup_vtk = write_setup_vtk
+
+        self.timesteps = timesteps
+        self.gpu_enabled_mpi = gpu_enabled_mpi
+        self.vtk_write_frequency = vtk_write_frequency
+        self.logger_frequency = logger_frequency
 
-        self.periodic = (0, 0, 0)
+        self.db_file_name = DB_FILE if db_file_name is None else db_file_name
 
         self.config_dict = self.config(print_dict=False)
 
@@ -22,39 +64,79 @@ class Scenario:
                 'domainSize': self.domain_size,
                 'rootBlocks': self.root_blocks,
                 'cellsPerBlock': self.cells_per_block,
-                'periodic': self.periodic
+                'periodic': self.periodic,
+            },
+            'SetupBlockForest': {
+                'refinementDepth': self.refinement_depth,
+                'numProcesses': self.num_processes,
+                'blockForestFilestem': self.bfs_filestem,
+                'writeVtk': self.write_setup_vtk,
+                'outputStatistics': True,
+                'writeSetupForestAndReturn': True,
             },
             'Parameters': {
                 'omega': 1.95,
-                'timesteps': 30001,
-
-                'refinementDepth': self.refinement_depth,
-                'writeSetupForestAndReturn': False,
-                'numProcesses': 1,
-
-                'cudaEnabledMPI': False,
-                'benchmarkKernelOnly': False,
-
-                'remainingTimeLoggerFrequency': 3,
-
-                'vtkWriteFrequency': 10000,
+                'timesteps': self.timesteps,
+                'remainingTimeLoggerFrequency': self.logger_frequency,
+                'vtkWriteFrequency': self.vtk_write_frequency,
+                'useVTKAMRWriter': True,
+                'oneFilePerProcess': False,
+                'writeOnlySlice': False,
+                'gpuEnabledMPI': self.gpu_enabled_mpi,
+                'gpuBlockSize': (128, 1, 1),
             },
             'Logging': {
                 'logLevel': "info",
             }
         }
 
-        if print_dict and config_dict["Parameters"]["writeSetupForestAndReturn"] is False:
+        if print_dict:
             wlb.log_info_on_root("Scenario:\n" + pformat(config_dict))
+
         return config_dict
 
+    @wlb.member_callback
+    def results_callback(self, **kwargs):
+        data = {}
+        data.update(self.config_dict['Parameters'])
+        data.update(self.config_dict['DomainSetup'])
+        data.update(kwargs)
+
+        data['executable'] = sys.argv[0]
+        data['compile_flags'] = wlb.build_info.compiler_flags
+        data['walberla_version'] = wlb.build_info.version
+        data['build_machine'] = wlb.build_info.build_machine
+
+        if ms:
+            state = ms.MachineState(extended=False, anonymous=True)
+            state.generate()                        # generate subclasses
+            state.update()                          # read information
+            data["MachineState"] = str(state.get())
+        else:
+            print("MachineState module is not available. MachineState was not saved")
+
+        sequenceValuesToScalars(data)
+        result = data
+        sequenceValuesToScalars(result)
+        num_tries = 4
+        # check multiple times e.g. may fail when multiple benchmark processes are running
+        table_name = f"runs"
+        table_name = table_name.replace("-", "_")
+        for num_try in range(num_tries):
+            try:
+                checkAndUpdateSchema(result, table_name, self.db_file_name)
+                storeSingle(result, table_name, self.db_file_name)
+                break
+            except sqlite3.OperationalError as e:
+                wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries}  {str(e)}")
+
 
 def validation_run():
     """Run with full periodic shear flow or boundary scenario (ldc) to check if the code works"""
     wlb.log_info_on_root("Validation run")
 
-    domain_size = (96, 96, 96)
-    cells_per_block = (32, 32, 32)
+    domain_size = (192, 192, 64)
+    cells_per_block = (64, 64, 64)
 
     root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
 
@@ -62,8 +144,91 @@ def validation_run():
     scenario = Scenario(domain_size=domain_size,
                         root_blocks=root_blocks,
                         cells_per_block=cells_per_block,
-                        refinement_depth=1)
+                        timesteps=0,
+                        vtk_write_frequency=0,
+                        refinement_depth=3,
+                        gpu_enabled_mpi=False)
     scenarios.add(scenario)
 
 
-validation_run()
+def weak_scaling_ldc(num_proc, gpu_enabled_mpi=False, uniform=True):
+    wlb.log_info_on_root("Running weak scaling benchmark...")
+
+    # This benchmark must run from 16 GPUs onwards
+    if wlb.mpi.numProcesses() > 1:
+        num_proc = wlb.mpi.numProcesses()
+
+    if uniform:
+        factor = 3 * num_proc
+        name = "uniform"
+    else:
+        if num_proc % 16 != 0:
+            raise RuntimeError("Number of processes must be dividable by 16")
+        factor = int(num_proc // 16)
+        name = "nonuniform"
+
+    cells_per_block = (WeakX, WeakY, WeakZ)
+    domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor)
+
+    root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
+
+    scenarios = wlb.ScenarioManager()
+    scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}",
+                        domain_size=domain_size,
+                        root_blocks=root_blocks,
+                        num_processes=num_proc,
+                        cells_per_block=cells_per_block,
+                        refinement_depth=0 if uniform else 3,
+                        timesteps=10,
+                        gpu_enabled_mpi=gpu_enabled_mpi,
+                        db_file_name=f"weakScalingGPU{name}LDC.sqlite3")
+    scenarios.add(scenario)
+
+
+def strong_scaling_ldc(num_proc, gpu_enabled_mpi=False, uniform=True):
+    wlb.log_info_on_root("Running strong scaling benchmark...")
+
+    # This benchmark must run from 64 GPUs onwards
+    if wlb.mpi.numProcesses() > 1:
+        num_proc = wlb.mpi.numProcesses()
+
+    if num_proc % 64 != 0:
+        raise RuntimeError("Number of processes must be dividable by 64")
+
+    cells_per_block = (StrongX, StrongY, StrongZ)
+
+    if uniform:
+        domain_size = (cells_per_block[0] * 2, cells_per_block[1] * 2, cells_per_block[2] * 16)
+        name = "uniform"
+    else:
+        factor = int(num_proc / 64)
+        blocks64 = block_decomposition(factor)
+        cells_per_block = tuple([int(c / b) for c, b in zip(cells_per_block, reversed(blocks64))])
+        domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor)
+        name = "nonuniform"
+
+    root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
+
+    scenarios = wlb.ScenarioManager()
+    scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}",
+                        domain_size=domain_size,
+                        root_blocks=root_blocks,
+                        num_processes=num_proc,
+                        cells_per_block=cells_per_block,
+                        refinement_depth=0 if uniform else 3,
+                        timesteps=10,
+                        gpu_enabled_mpi=gpu_enabled_mpi,
+                        db_file_name=f"strongScalingGPU{name}LDC.sqlite3")
+    scenarios.add(scenario)
+
+
+if BENCHMARK == 0:
+    validation_run()
+elif BENCHMARK == 1:
+    weak_scaling_ldc(1, True, False)
+elif BENCHMARK == 2:
+    strong_scaling_ldc(1, True, False)
+else:
+    print(f"Invalid benchmark case {BENCHMARK}")
+
+
diff --git a/apps/benchmarks/PhaseFieldAllenCahn/CMakeLists.txt b/apps/benchmarks/PhaseFieldAllenCahn/CMakeLists.txt
index 1b530d61a14ca8d84cbc1f3d9c28ea873258f7a0..bb199b8fb23a5bfd3eb0cd29f73a504f00fbe1ff 100644
--- a/apps/benchmarks/PhaseFieldAllenCahn/CMakeLists.txt
+++ b/apps/benchmarks/PhaseFieldAllenCahn/CMakeLists.txt
@@ -15,10 +15,10 @@ waLBerla_generate_target_from_python(NAME BenchmarkPhaseFieldCodeGen
 if (WALBERLA_BUILD_WITH_GPU_SUPPORT )
     waLBerla_add_executable(NAME benchmark_multiphase
             FILES benchmark_multiphase.cpp InitializerFunctions.cpp multiphase_codegen.py
-            DEPENDS blockforest core gpu field postprocessing python_coupling lbm geometry timeloop gui BenchmarkPhaseFieldCodeGen)
+            DEPENDS blockforest core gpu field postprocessing python_coupling lbm_generated geometry timeloop gui BenchmarkPhaseFieldCodeGen)
 else ()
     waLBerla_add_executable(NAME benchmark_multiphase
             FILES benchmark_multiphase.cpp InitializerFunctions.cpp multiphase_codegen.py
-            DEPENDS blockforest core field postprocessing python_coupling lbm geometry timeloop gui BenchmarkPhaseFieldCodeGen)
+            DEPENDS blockforest core field postprocessing python_coupling lbm_generated geometry timeloop gui BenchmarkPhaseFieldCodeGen)
 endif (WALBERLA_BUILD_WITH_GPU_SUPPORT )
 
diff --git a/apps/benchmarks/PhaseFieldAllenCahn/benchmark.py b/apps/benchmarks/PhaseFieldAllenCahn/benchmark.py
index 88a410c1062a0699ac93058104d6dcc46f51a5bf..a703e9dfb053a17587dad241d35d7fcdddb0ba3b 100755
--- a/apps/benchmarks/PhaseFieldAllenCahn/benchmark.py
+++ b/apps/benchmarks/PhaseFieldAllenCahn/benchmark.py
@@ -8,6 +8,11 @@ from waLBerla.tools.config import block_decomposition
 import sys
 from math import prod
 
+try:
+    import machinestate as ms
+except ImportError:
+    ms = None
+
 
 def domain_block_size_ok(block_size, total_mem, gls=1, q_phase=15, q_hydro=27, size_per_value=8):
     """Checks if a single block of given size fits into GPU memory"""
@@ -20,7 +25,9 @@ def domain_block_size_ok(block_size, total_mem, gls=1, q_phase=15, q_hydro=27, s
 
 
 class Scenario:
-    def __init__(self, time_step_strategy, cuda_block_size, cells_per_block=(256, 256, 256),
+    def __init__(self, time_step_strategy,
+                 cuda_block_size,
+                 cells_per_block=(256, 256, 256),
                  cuda_enabled_mpi=False):
         # output frequencies
         self.vtkWriteFrequency = 0
@@ -89,6 +96,14 @@ class Scenario:
         data['compile_flags'] = wlb.build_info.compiler_flags
         data['walberla_version'] = wlb.build_info.version
         data['build_machine'] = wlb.build_info.build_machine
+        if ms:
+            state = ms.MachineState(extended=False, anonymous=True)
+            state.generate()                        # generate subclasses
+            state.update()                          # read information
+            data["MachineState"] = str(state.get())
+        else:
+            print("MachineState module is not available. MachineState was not saved")
+
         sequenceValuesToScalars(data)
 
         df = pd.DataFrame.from_records([data])
@@ -101,43 +116,19 @@ class Scenario:
 def benchmark():
     scenarios = wlb.ScenarioManager()
 
-    gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 8))
+    gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 40))
     gpu_mem = gpu_mem_gb * (2 ** 30)
 
-    block_size = (256, 256, 256)
+    block_size = (320, 320, 320)
+    cuda_enabled_mpi = True
 
     if not domain_block_size_ok(block_size, gpu_mem):
         wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.")
     else:
-        scenarios.add(Scenario(time_step_strategy='normal', cuda_block_size=(256, 1, 1), cells_per_block=block_size))
-
+        scenarios.add(Scenario(time_step_strategy='normal',
+                               cuda_block_size=(128, 1, 1),
+                               cells_per_block=block_size,
+                               cuda_enabled_mpi=cuda_enabled_mpi))
 
-def kernel_benchmark():
-    scenarios = wlb.ScenarioManager()
-
-    gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 8))
-    gpu_mem = gpu_mem_gb * (2 ** 30)
 
-    block_sizes = [(i, i, i) for i in (32, 64, 128, 256, 320, 384, 448, 512)]
-
-    cuda_blocks = [(32, 1, 1), (64, 1, 1), (128, 1, 1), (256, 1, 1),
-                   (32, 2, 1), (64, 2, 1), (128, 2, 1),
-                   (32, 4, 1), (64, 4, 1),
-                   (32, 4, 2),
-                   (32, 8, 1),
-                   (16, 16, 1)]
-
-    for time_step_strategy in ['phase_only', 'hydro_only', 'kernel_only', 'normal']:
-        for cuda_block in cuda_blocks:
-            for block_size in block_sizes:
-                if not domain_block_size_ok(block_size, gpu_mem):
-                    wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.")
-                    continue
-                scenario = Scenario(time_step_strategy=time_step_strategy,
-                                    cuda_block_size=cuda_block,
-                                    cells_per_block=block_size)
-                scenarios.add(scenario)
-
-
-# benchmark()
-kernel_benchmark()
+benchmark()
diff --git a/apps/benchmarks/PhaseFieldAllenCahn/benchmark_multiphase.cpp b/apps/benchmarks/PhaseFieldAllenCahn/benchmark_multiphase.cpp
index 2a59e6be99b49942a169d1c24921a93cbe8abd1e..fac902a068facfdd73b3adec6dd31194eca8acb4 100644
--- a/apps/benchmarks/PhaseFieldAllenCahn/benchmark_multiphase.cpp
+++ b/apps/benchmarks/PhaseFieldAllenCahn/benchmark_multiphase.cpp
@@ -29,6 +29,7 @@
 #include "field/vtk/VTKWriter.h"
 
 #include "geometry/InitBoundaryHandling.h"
+#include "lbm_generated/evaluation/PerformanceEvaluation.h"
 
 #include "python_coupling/CreateConfig.h"
 #include "python_coupling/DictWrapper.h"
@@ -78,14 +79,10 @@ int main(int argc, char** argv)
       logging::configureLogging(config);
       shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGridFromConfig(config);
 
-      Vector3< uint_t > cellsPerBlock =
-         config->getBlock("DomainSetup").getParameter< Vector3< uint_t > >("cellsPerBlock");
       // Reading parameters
       auto parameters                    = config->getOneBlock("Parameters");
       const std::string timeStepStrategy = parameters.getParameter< std::string >("timeStepStrategy", "normal");
       const uint_t timesteps             = parameters.getParameter< uint_t >("timesteps", uint_c(50));
-      const real_t remainingTimeLoggerFrequency =
-         parameters.getParameter< real_t >("remainingTimeLoggerFrequency", real_c(3.0));
       const uint_t scenario = parameters.getParameter< uint_t >("scenario", uint_c(1));
       const uint_t warmupSteps  = parameters.getParameter< uint_t >("warmupSteps", uint_t(2));
 
@@ -102,6 +99,7 @@ int main(int argc, char** argv)
          gpu::addGPUFieldToStorage< VelocityField_T >(blocks, vel_field, "velocity field on GPU", true);
       BlockDataID phase_field_gpu =
          gpu::addGPUFieldToStorage< PhaseField_T >(blocks, phase_field, "phase field on GPU", true);
+      BlockDataID phase_field_tmp = gpu::addGPUFieldToStorage< PhaseField_T >(blocks, phase_field, "temporary phasefield", true);
 #else
       BlockDataID lb_phase_field =
          field::addToStorage< PdfField_phase_T >(blocks, "lb phase field", real_c(0.0), field::fzyx);
@@ -109,6 +107,7 @@ int main(int argc, char** argv)
          field::addToStorage< PdfField_hydro_T >(blocks, "lb velocity field", real_c(0.0), field::fzyx);
       BlockDataID vel_field   = field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx);
       BlockDataID phase_field = field::addToStorage< PhaseField_T >(blocks, "phase", real_c(0.0), field::fzyx);
+      BlockDataID phase_field_tmp = field::addToStorage< PhaseField_T >(blocks, "phase tmp", real_c(0.0), field::fzyx);
 #endif
 
       if (timeStepStrategy != "phase_only" && timeStepStrategy != "hydro_only" && timeStepStrategy != "kernel_only")
@@ -139,47 +138,80 @@ int main(int argc, char** argv)
       pystencils::initialize_velocity_based_distributions init_g(lb_velocity_field_gpu, vel_field_gpu);
 
       pystencils::phase_field_LB_step phase_field_LB_step(
-         lb_phase_field_gpu, phase_field_gpu, vel_field_gpu, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]);
+         lb_phase_field_gpu, phase_field_gpu, phase_field_tmp, vel_field_gpu, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]);
       pystencils::hydro_LB_step hydro_LB_step(lb_velocity_field_gpu, phase_field_gpu, vel_field_gpu, gpuBlockSize[0],
                                               gpuBlockSize[1], gpuBlockSize[2]);
 #else
       pystencils::initialize_phase_field_distributions init_h(lb_phase_field, phase_field, vel_field);
       pystencils::initialize_velocity_based_distributions init_g(lb_velocity_field, vel_field);
-      pystencils::phase_field_LB_step phase_field_LB_step(lb_phase_field, phase_field, vel_field);
+      pystencils::phase_field_LB_step phase_field_LB_step(lb_phase_field, phase_field, phase_field_tmp, vel_field);
       pystencils::hydro_LB_step hydro_LB_step(lb_velocity_field, phase_field, vel_field);
 #endif
 
 // add communication
 #if defined(WALBERLA_BUILD_WITH_CUDA)
-      const bool cudaEnabledMpi = parameters.getParameter< bool >("cudaEnabledMpi", false);
-      auto Comm_velocity_based_distributions =
-         make_shared< gpu::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, cudaEnabledMpi);
-      auto generatedPackInfo_velocity_based_distributions =
-         make_shared< lbm::PackInfo_velocity_based_distributions >(lb_velocity_field_gpu);
-      Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_velocity_based_distributions);
+      const bool gpuEnabledMpi = parameters.getParameter< bool >("cudaEnabledMpi", false);
+      const int streamLowPriority  = 0;
+      const int streamHighPriority = 0;
+      auto defaultStream     = gpu::StreamRAII::newPriorityStream(streamLowPriority);
+      auto innerOuterStreams = gpu::ParallelStreams(streamHighPriority);
+
+      auto generatedPackInfo_phase_field_distributions = make_shared< lbm::PackInfo_phase_field_distributions>(lb_phase_field_gpu);
+      auto generatedPackInfo_velocity_based_distributions = make_shared< lbm::PackInfo_velocity_based_distributions >(lb_velocity_field_gpu);
       auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field_gpu);
-      Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_phase_field);
 
-      auto Comm_phase_field_distributions =
-         make_shared< gpu::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, cudaEnabledMpi);
-      auto generatedPackInfo_phase_field_distributions =
-         make_shared< lbm::PackInfo_phase_field_distributions >(lb_phase_field_gpu);
-      Comm_phase_field_distributions->addPackInfo(generatedPackInfo_phase_field_distributions);
-#else
+      auto UniformGPUSchemeVelocityBasedDistributions = make_shared< gpu::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, gpuEnabledMpi, false);
+      auto UniformGPUSchemePhaseFieldDistributions = make_shared< gpu::communication::UniformGPUScheme< Full_Stencil_T > >(blocks, gpuEnabledMpi, false);
+      auto UniformGPUSchemePhaseField = make_shared< gpu::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, gpuEnabledMpi, false, 65432);
+
+      UniformGPUSchemeVelocityBasedDistributions->addPackInfo(generatedPackInfo_velocity_based_distributions);
+      UniformGPUSchemePhaseFieldDistributions->addPackInfo(generatedPackInfo_phase_field_distributions);
+      UniformGPUSchemePhaseField->addPackInfo(generatedPackInfo_phase_field);
+
+      auto Comm_velocity_based_distributions_start = std::function< void() >([&]() { UniformGPUSchemeVelocityBasedDistributions->startCommunication(); });
+      auto Comm_velocity_based_distributions_wait = std::function< void() >([&]() { UniformGPUSchemeVelocityBasedDistributions->wait(); });
 
-      blockforest::communication::UniformBufferedScheme< Stencil_hydro_T > Comm_velocity_based_distributions(blocks);
+      auto Comm_phase_field_distributions_start = std::function< void() >([&]() { UniformGPUSchemePhaseFieldDistributions->startCommunication(); });
+      auto Comm_phase_field_distributions_wait = std::function< void() >([&]() { UniformGPUSchemePhaseFieldDistributions->wait(); });
 
+      auto Comm_phase_field = std::function< void() >([&]() { UniformGPUSchemePhaseField->communicate(); });
+
+      auto swapPhaseField = std::function< void(IBlock *) >([&](IBlock * b)
+        {
+           auto phaseField    = b->getData< gpu::GPUField<real_t> >(phase_field_gpu);
+           auto phaseFieldTMP = b->getData< gpu::GPUField<real_t> >(phase_field_tmp);
+           phaseField->swapDataPointers(phaseFieldTMP);
+        });
+
+#else
+
+      auto generatedPackInfo_phase_field_distributions = make_shared< lbm::PackInfo_phase_field_distributions>(lb_phase_field);
+      auto generatedPackInfo_velocity_based_distributions = make_shared< lbm::PackInfo_velocity_based_distributions >(lb_velocity_field);
       auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field);
-      auto generatedPackInfo_velocity_based_distributions =
-         make_shared< lbm::PackInfo_velocity_based_distributions >(lb_velocity_field);
 
-      Comm_velocity_based_distributions.addPackInfo(generatedPackInfo_phase_field);
-      Comm_velocity_based_distributions.addPackInfo(generatedPackInfo_velocity_based_distributions);
+      auto UniformGPUSchemeVelocityBasedDistributions = make_shared< blockforest::communication::UniformBufferedScheme< Full_Stencil_T > >(blocks);
+      auto UniformGPUSchemePhaseFieldDistributions = make_shared< blockforest::communication::UniformBufferedScheme< Full_Stencil_T > >(blocks);
+      auto UniformGPUSchemePhaseField = make_shared< blockforest::communication::UniformBufferedScheme< Full_Stencil_T > >(blocks, 65432);
+
+      UniformGPUSchemeVelocityBasedDistributions->addPackInfo(generatedPackInfo_velocity_based_distributions);
+      UniformGPUSchemePhaseFieldDistributions->addPackInfo(generatedPackInfo_phase_field_distributions);
+      UniformGPUSchemePhaseField->addPackInfo(generatedPackInfo_phase_field);
 
-      blockforest::communication::UniformBufferedScheme< Stencil_hydro_T > Comm_phase_field_distributions(blocks);
-      auto generatedPackInfo_phase_field_distributions =
-         make_shared< lbm::PackInfo_phase_field_distributions >(lb_phase_field);
-      Comm_phase_field_distributions.addPackInfo(generatedPackInfo_phase_field_distributions);
+      auto Comm_velocity_based_distributions_start = std::function< void() >([&]() { UniformGPUSchemeVelocityBasedDistributions->startCommunication(); });
+      auto Comm_velocity_based_distributions_wait = std::function< void() >([&]() { UniformGPUSchemeVelocityBasedDistributions->wait(); });
+
+      auto Comm_phase_field_distributions = std::function< void() >([&]() { UniformGPUSchemePhaseFieldDistributions->communicate(); });
+      auto Comm_phase_field_distributions_start = std::function< void() >([&]() { UniformGPUSchemePhaseFieldDistributions->startCommunication(); });
+      auto Comm_phase_field_distributions_wait = std::function< void() >([&]() { UniformGPUSchemePhaseFieldDistributions->wait(); });
+
+      auto Comm_phase_field = std::function< void() >([&]() { UniformGPUSchemePhaseField->communicate(); });
+
+      auto swapPhaseField = std::function< void(IBlock *) >([&](IBlock * b)
+        {
+           auto phaseField    = b->getData< PhaseField_T >(phase_field);
+           auto phaseFieldTMP = b->getData< PhaseField_T >(phase_field_tmp);
+           phaseField->swapDataPointers(phaseFieldTMP);
+        });
 #endif
 
       BlockDataID const flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field");
@@ -201,99 +233,37 @@ int main(int argc, char** argv)
             init_h(&block);
             init_g(&block);
          }
+         WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+         WALBERLA_GPU_CHECK(gpuPeekAtLastError())
+         WALBERLA_MPI_BARRIER()
          WALBERLA_LOG_INFO_ON_ROOT("initialization of the distributions done")
       }
 
+      SweepTimeloop timeloop(blocks->getBlockStorage(), timesteps);
 #if defined(WALBERLA_BUILD_WITH_CUDA)
-      int const streamLowPriority  = 0;
-      int const streamHighPriority = 0;
-      auto defaultStream     = gpu::StreamRAII::newPriorityStream(streamLowPriority);
-      auto innerOuterStreams = gpu::ParallelStreams(streamHighPriority);
-#endif
+      timeloop.add() << BeforeFunction(Comm_velocity_based_distributions_start, "Start Hydro PDFs Communication")
+                     << Sweep(phase_field_LB_step.getSweep(defaultStream), "Phase LB Step")
+                     << AfterFunction(Comm_velocity_based_distributions_wait, "Wait Hydro PDFs Communication");
 
-      auto timeLoop = make_shared< SweepTimeloop >(blocks->getBlockStorage(), timesteps);
-#if defined(WALBERLA_BUILD_WITH_CUDA)
-      auto normalTimeStep = [&]() {
-         Comm_velocity_based_distributions->startCommunication();
-         for (auto& block : *blocks)
-            phase_field_LB_step(&block, defaultStream);
-         Comm_velocity_based_distributions->wait();
+      timeloop.add() << BeforeFunction(Comm_phase_field_distributions_start, "Start Phase PDFs Communication")
+                     << Sweep(hydro_LB_step.getSweep(defaultStream), "Hydro LB Step");
+      timeloop.add() << Sweep(swapPhaseField, "Swap PhaseField")
+                     << AfterFunction(Comm_phase_field_distributions_wait, "Wait Phase PDFs Communication");
+
+      timeloop.addFuncAfterTimeStep(Comm_phase_field, "Communication Phase field");
 
-         Comm_phase_field_distributions->startCommunication();
-         for (auto& block : *blocks)
-            hydro_LB_step(&block, defaultStream);
-         Comm_phase_field_distributions->wait();
-      };
-      auto phase_only = [&]() {
-         for (auto& block : *blocks)
-            phase_field_LB_step(&block);
-      };
-      auto hydro_only = [&]() {
-         for (auto& block : *blocks)
-            hydro_LB_step(&block);
-      };
-      auto without_comm = [&]() {
-         for (auto& block : *blocks)
-            phase_field_LB_step(&block);
-         for (auto& block : *blocks)
-            hydro_LB_step(&block);
-      };
 #else
-      auto normalTimeStep = [&]() {
-            Comm_velocity_based_distributions.startCommunication();
-            for (auto& block : *blocks)
-               phase_field_LB_step(&block);
-            Comm_velocity_based_distributions.wait();
-
-            Comm_phase_field_distributions.startCommunication();
-            for (auto& block : *blocks)
-               hydro_LB_step(&block);
-            Comm_phase_field_distributions.wait();
-      };
-      auto phase_only = [&]() {
-         for (auto& block : *blocks)
-            phase_field_LB_step(&block);
-      };
-      auto hydro_only = [&]() {
-         for (auto& block : *blocks)
-            hydro_LB_step(&block);
-      };
-      auto without_comm = [&]() {
-         for (auto& block : *blocks)
-            phase_field_LB_step(&block);
-         for (auto& block : *blocks)
-            hydro_LB_step(&block);
-      };
-#endif
-      std::function< void() > timeStep;
-      if (timeStepStrategy == "phase_only")
-      {
-         timeStep = std::function< void() >(phase_only);
-         WALBERLA_LOG_INFO_ON_ROOT("started only phasefield step without communication for benchmarking")
-      }
-      else if (timeStepStrategy == "hydro_only")
-      {
-         timeStep = std::function< void() >(hydro_only);
-         WALBERLA_LOG_INFO_ON_ROOT("started only hydro step without communication for benchmarking")
-      }
-      else if (timeStepStrategy == "kernel_only")
-      {
-         timeStep = std::function< void() >(without_comm);
-         WALBERLA_LOG_INFO_ON_ROOT("started complete phasefield model without communication for benchmarking")
-      }
-      else
-      {
-         timeStep = std::function< void() >(normalTimeStep);
-         WALBERLA_LOG_INFO_ON_ROOT("normal timestep with overlapping")
-      }
+      timeloop.add() << BeforeFunction(Comm_velocity_based_distributions_start, "Start Hydro PDFs Communication")
+                     << Sweep(phase_field_LB_step.getSweep(), "Phase LB Step")
+                     << AfterFunction(Comm_velocity_based_distributions_wait, "Wait Hydro PDFs Communication");
 
-      timeLoop->add() << BeforeFunction(timeStep) << Sweep([](IBlock*) {}, "time step");
+      timeloop.add() << BeforeFunction(Comm_phase_field_distributions_start, "Start Phase PDFs Communication")
+                     << Sweep(hydro_LB_step.getSweep(), "Hydro LB Step");
+      timeloop.add() << Sweep(swapPhaseField, "Swap PhaseField")
+                     << AfterFunction(Comm_phase_field_distributions_wait, "Wait Phase PDFs Communication");
 
-      // remaining time logger
-      if (remainingTimeLoggerFrequency > 0)
-         timeLoop->addFuncAfterTimeStep(
-            timing::RemainingTimeLogger(timeLoop->getNrOfTimeSteps(), remainingTimeLoggerFrequency),
-            "remaining time logger");
+      timeloop.addFuncAfterTimeStep(Comm_phase_field, "Communication Phase field");
+#endif
 
       uint_t const vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
       if (vtkWriteFrequency > 1)
@@ -307,40 +277,60 @@ int main(int argc, char** argv)
          auto phaseWriter = make_shared< field::VTKWriter< PhaseField_T > >(phase_field, "phase");
          vtkOutput->addCellDataWriter(phaseWriter);
 
-         timeLoop->addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
+         timeloop.addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
       }
 
+      lbm_generated::PerformanceEvaluation< FlagField_T > const performance(blocks, flagFieldID, fluidFlagUID);
+      field::CellCounter< FlagField_T > fluidCells(blocks, flagFieldID, fluidFlagUID);
+      fluidCells();
+
+      WALBERLA_LOG_INFO_ON_ROOT("Multiphase benchmark with " << fluidCells.numberOfCells() << " fluid cells")
+      WALBERLA_LOG_INFO_ON_ROOT("Running " << warmupSteps << " timesteps to warm up the system")
+
       for (uint_t i = 0; i < warmupSteps; ++i)
-         timeLoop->singleStep();
+         timeloop.singleStep();
+
+      WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+      WALBERLA_GPU_CHECK(gpuPeekAtLastError())
+      WALBERLA_MPI_BARRIER()
+      WALBERLA_LOG_INFO_ON_ROOT("Warmup timesteps done")
 
-      timeLoop->setCurrentTimeStepToZero();
+      timeloop.setCurrentTimeStepToZero();
+      WALBERLA_MPI_BARRIER()
       WALBERLA_LOG_INFO_ON_ROOT("Starting simulation with " << timesteps << " time steps")
+      WcTimingPool timeloopTiming;
       WcTimer simTimer;
 #if defined(WALBERLA_BUILD_WITH_CUDA)
-      cudaDeviceSynchronize();
+      WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
 #endif
       simTimer.start();
-      timeLoop->run();
+      timeloop.run(timeloopTiming);
 #if defined(WALBERLA_BUILD_WITH_CUDA)
-      cudaDeviceSynchronize();
+      WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+      WALBERLA_GPU_CHECK(gpuPeekAtLastError())
 #endif
+      WALBERLA_MPI_BARRIER()
       simTimer.end();
       WALBERLA_LOG_INFO_ON_ROOT("Simulation finished")
-      auto time            = real_c(simTimer.last());
-      auto nrOfCells       = real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]);
-      auto mlupsPerProcess = nrOfCells * real_c(timesteps) / time * 1e-6;
-      WALBERLA_LOG_RESULT_ON_ROOT("MLUPS per process: " << mlupsPerProcess)
+      double time = simTimer.max();
+      WALBERLA_MPI_SECTION() { walberla::mpi::reduceInplace(time, walberla::mpi::MAX); }
+      performance.logResultOnRoot(timesteps, time);
+
+      const auto reducedTimeloopTiming = timeloopTiming.getReduced();
+      WALBERLA_LOG_RESULT_ON_ROOT("Time loop timing:\n" << *reducedTimeloopTiming)
+
+      WALBERLA_LOG_RESULT_ON_ROOT("MLUPS per process: " << performance.mlupsPerProcess(timesteps, time))
       WALBERLA_LOG_RESULT_ON_ROOT("Time per time step: " << time / real_c(timesteps) << " s")
       WALBERLA_ROOT_SECTION()
       {
          python_coupling::PythonCallback pythonCallbackResults("results_callback");
          if (pythonCallbackResults.isCallable())
          {
-            pythonCallbackResults.data().exposeValue("mlupsPerProcess", mlupsPerProcess);
+            pythonCallbackResults.data().exposeValue("mlupsPerProcess", performance.mlupsPerProcess(timesteps, time));
             pythonCallbackResults.data().exposeValue("stencil_phase", StencilNamePhase);
             pythonCallbackResults.data().exposeValue("stencil_hydro", StencilNameHydro);
             #if defined(WALBERLA_BUILD_WITH_CUDA)
-               pythonCallbackResults.data().exposeValue("cuda_enabled_mpi", cudaEnabledMpi);
+               pythonCallbackResults.data().exposeValue("cuda_enabled_mpi", gpuEnabledMpi);
             #endif
             // Call Python function to report results
             pythonCallbackResults();
diff --git a/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py b/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py
index d0e676f573c33880f7fe9e20f60700e51d36f28a..30fe929790356155b4acba0a2e8e51924905362f 100644
--- a/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py
+++ b/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py
@@ -19,7 +19,7 @@ with CodeGeneration() as ctx:
     field_type = "float64" if ctx.double_accuracy else "float32"
 
     stencil_phase = LBStencil(Stencil.D3Q15)
-    stencil_hydro = LBStencil(Stencil.D3Q27)
+    stencil_hydro = LBStencil(Stencil.D3Q19)
     assert (stencil_phase.D == stencil_hydro.D)
 
     ########################
@@ -76,13 +76,13 @@ with CodeGeneration() as ctx:
                                  delta_equilibrium=False,
                                  force=sp.symbols(f"F_:{stencil_phase.D}"), velocity_input=u,
                                  weighted=True, relaxation_rates=rates,
-                                 output={'density': C_tmp}, kernel_type='stream_pull_collide')
+                                 output={'density': C_tmp})
     method_phase = create_lb_method(lbm_config=lbm_config_phase)
 
     lbm_config_hydro = LBMConfig(stencil=stencil_hydro, method=Method.MRT, compressible=False,
                                  weighted=True, relaxation_rate=omega,
                                  force=sp.symbols(f"F_:{stencil_hydro.D}"),
-                                 output={'velocity': u}, kernel_type='collide_stream_push')
+                                 output={'velocity': u})
     method_hydro = create_lb_method(lbm_config=lbm_config_hydro)
 
     # create the kernels for the initialization of the g and h field
@@ -137,7 +137,8 @@ with CodeGeneration() as ctx:
     sweep_params = {'block_size': sweep_block_size}
 
     stencil_typedefs = {'Stencil_phase_T': stencil_phase,
-                        'Stencil_hydro_T': stencil_hydro}
+                        'Stencil_hydro_T': stencil_hydro,
+                        'Full_Stencil_T': LBStencil(Stencil.D3Q27)}
     field_typedefs = {'PdfField_phase_T': h,
                       'PdfField_hydro_T': g,
                       'VelocityField_T': u,
@@ -156,7 +157,7 @@ with CodeGeneration() as ctx:
         generate_sweep(ctx, 'initialize_velocity_based_distributions', g_updates, target=Target.CPU)
 
         generate_sweep(ctx, 'phase_field_LB_step', phase_field_LB_step,
-                       field_swaps=[(h, h_tmp), (C, C_tmp)],
+                       field_swaps=[(h, h_tmp)],
                        inner_outer_split=True,
                        cpu_vectorize_info=cpu_vec,
                        target=Target.CPU)
@@ -172,7 +173,7 @@ with CodeGeneration() as ctx:
                               streaming_pattern='pull', target=Target.CPU)
 
         generate_lb_pack_info(ctx, 'PackInfo_velocity_based_distributions', stencil_hydro, g,
-                              streaming_pattern='push', target=Target.CPU)
+                              streaming_pattern='pull', target=Target.CPU)
 
         generate_pack_info_for_field(ctx, 'PackInfo_phase_field', C, target=Target.CPU)
 
@@ -183,7 +184,7 @@ with CodeGeneration() as ctx:
                        g_updates, target=Target.GPU)
 
         generate_sweep(ctx, 'phase_field_LB_step', phase_field_LB_step,
-                       field_swaps=[(h, h_tmp), (C, C_tmp)],
+                       field_swaps=[(h, h_tmp)],
                        target=Target.GPU,
                        gpu_indexing_params=sweep_params,
                        varying_parameters=vp)
@@ -198,7 +199,7 @@ with CodeGeneration() as ctx:
                               streaming_pattern='pull', target=Target.GPU)
 
         generate_lb_pack_info(ctx, 'PackInfo_velocity_based_distributions', stencil_hydro, g,
-                              streaming_pattern='push', target=Target.GPU)
+                              streaming_pattern='pull', target=Target.GPU)
 
         generate_pack_info_for_field(ctx, 'PackInfo_phase_field', C, target=Target.GPU)
 
diff --git a/apps/benchmarks/UniformGridCPU/CMakeLists.txt b/apps/benchmarks/UniformGridCPU/CMakeLists.txt
index 8e9c1e7af92fcde2102b9e279abed9e451c28858..76c40ea570e888dd5233e4fb17aa9a1540c1fb22 100644
--- a/apps/benchmarks/UniformGridCPU/CMakeLists.txt
+++ b/apps/benchmarks/UniformGridCPU/CMakeLists.txt
@@ -3,14 +3,16 @@ waLBerla_link_files_to_builddir( "*.py" )
 waLBerla_link_files_to_builddir( "simulation_setup" )
 
 
-foreach(streaming_pattern pull push aa esotwist)
+foreach(streaming_pattern pull push aa esotwist esopull esopush)
     foreach(stencil d3q19 d3q27)
-        foreach (collision_setup srt trt w-mrt r-w-mrt cm r-cm k r-k entropic smagorinsky)
+        foreach (collision_setup srt trt mrt mrt-overrelax central central-overrelax cumulant cumulant-overrelax cumulant-K17 entropic smagorinsky qr)
 	    # KBC methods only for D2Q9 and D3Q27 defined
 	    if (${collision_setup} STREQUAL "entropic" AND ${stencil} STREQUAL "d3q19")
 		    continue()
-	    endif (${collision_setup} STREQUAL "entropic" AND ${stencil} STREQUAL "d3q19")
-
+        endif (${collision_setup} STREQUAL "entropic" AND ${stencil} STREQUAL "d3q19")
+        if (${collision_setup} STREQUAL "cumulant-K17" AND ${stencil} STREQUAL "d3q19")
+            continue()
+        endif (${collision_setup} STREQUAL "cumulant-K17" AND ${stencil} STREQUAL "d3q19")
             set(config ${stencil}_${streaming_pattern}_${collision_setup})
             waLBerla_generate_target_from_python(NAME UniformGridCPUGenerated_${config}
                     FILE UniformGridCPU.py
diff --git a/apps/benchmarks/UniformGridCPU/UniformGridCPU.cpp b/apps/benchmarks/UniformGridCPU/UniformGridCPU.cpp
index 4674cfae92aaaf652b8e83e4e1dcae9c87427c1a..a7eb9ecf4fbe63f4a284be147ac0e09e777830d3 100644
--- a/apps/benchmarks/UniformGridCPU/UniformGridCPU.cpp
+++ b/apps/benchmarks/UniformGridCPU/UniformGridCPU.cpp
@@ -64,6 +64,9 @@ using SweepCollection_T = lbm::UniformGridCPUSweepCollection;
 
 using blockforest::communication::UniformBufferedScheme;
 
+using macroFieldType = VelocityField_T::value_type;
+using pdfFieldType = PdfField_T::value_type;
+
 int main(int argc, char** argv)
 {
    const mpi::Environment env(argc, argv);
@@ -87,10 +90,10 @@ int main(int argc, char** argv)
 
       // Creating fields
       const StorageSpecification_T StorageSpec = StorageSpecification_T();
-      auto fieldAllocator = make_shared< field::AllocateAligned< real_t, 64 > >();
+      auto fieldAllocator = make_shared< field::AllocateAligned< pdfFieldType, 64 > >();
       const BlockDataID pdfFieldId  = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, field::fzyx, fieldAllocator);
-      const BlockDataID velFieldId = field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx);
-      const BlockDataID densityFieldId = field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx);
+      const BlockDataID velFieldId = field::addToStorage< VelocityField_T >(blocks, "vel", macroFieldType(0.0), field::fzyx);
+      const BlockDataID densityFieldId = field::addToStorage< ScalarField_T >(blocks, "density", macroFieldType(1.0), field::fzyx);
       const BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field");
 
       // Initialize velocity on cpu
@@ -235,12 +238,16 @@ int main(int argc, char** argv)
                   pythonCallbackResults.data().exposeValue("numProcesses", performance.processes());
                   pythonCallbackResults.data().exposeValue("numThreads", performance.threads());
                   pythonCallbackResults.data().exposeValue("numCores", performance.cores());
+                  pythonCallbackResults.data().exposeValue("numberOfCells", performance.numberOfCells());
+                  pythonCallbackResults.data().exposeValue("numberOfFluidCells", performance.numberOfFluidCells());
                   pythonCallbackResults.data().exposeValue("mlups", performance.mlups(timesteps, time));
                   pythonCallbackResults.data().exposeValue("mlupsPerCore", performance.mlupsPerCore(timesteps, time));
                   pythonCallbackResults.data().exposeValue("mlupsPerProcess", performance.mlupsPerProcess(timesteps, time));
                   pythonCallbackResults.data().exposeValue("stencil", infoStencil);
                   pythonCallbackResults.data().exposeValue("streamingPattern", infoStreamingPattern);
                   pythonCallbackResults.data().exposeValue("collisionSetup", infoCollisionSetup);
+                  pythonCallbackResults.data().exposeValue("vectorised", vectorised);
+                  pythonCallbackResults.data().exposeValue("nontemporal", nontemporal);
                   pythonCallbackResults.data().exposeValue("cse_global", infoCseGlobal);
                   pythonCallbackResults.data().exposeValue("cse_pdfs", infoCsePdfs);
                   // Call Python function to report results
diff --git a/apps/benchmarks/UniformGridCPU/UniformGridCPU.py b/apps/benchmarks/UniformGridCPU/UniformGridCPU.py
index 5a600eade461c5698c35487ce6aca8a78207aa63..723b28f1ac500d881d1659f7e43d1c689eea64a2 100644
--- a/apps/benchmarks/UniformGridCPU/UniformGridCPU.py
+++ b/apps/benchmarks/UniformGridCPU/UniformGridCPU.py
@@ -3,17 +3,12 @@ from dataclasses import replace
 import sympy as sp
 import pystencils as ps
 
-from pystencils.simp.subexpression_insertion import insert_zeros, insert_aliases, insert_constants,\
-    insert_symbol_times_minus_one
-
 from lbmpy.advanced_streaming import is_inplace
-from lbmpy.advanced_streaming.utility import streaming_patterns, get_accessor, Timestep
+from lbmpy.advanced_streaming.utility import streaming_patterns
 from lbmpy.boundaries import NoSlip, UBB
 from lbmpy.creationfunctions import LBMConfig, LBMOptimisation, LBStencil, create_lb_collision_rule
 from lbmpy.enums import Method, Stencil, SubgridScaleModel
-from lbmpy.fieldaccess import CollideOnlyInplaceAccessor
 from lbmpy.moments import get_default_moment_set_for_stencil
-from lbmpy.updatekernels import create_stream_only_kernel
 
 from pystencils_walberla import CodeGeneration, generate_info_header, generate_sweep
 from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator
@@ -21,48 +16,53 @@ from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator
 omega = sp.symbols('omega')
 omega_free = sp.Symbol('omega_free')
 
-# best configs in terms of FLOPS
 options_dict = {
     'srt': {
         'method': Method.SRT,
         'relaxation_rate': omega,
-        'compressible': True,
+        'compressible': False,
     },
     'trt': {
         'method': Method.TRT,
         'relaxation_rate': omega,
-        'compressible': True,
+        'compressible': False,
     },
-    'r-w-mrt': {
+    'mrt': {
         'method': Method.MRT,
         'relaxation_rates': [omega, 1, 1, 1, 1, 1, 1],
-        'compressible': True,
+        'compressible': False,
     },
-    'w-mrt': {
+    'mrt-overrelax': {
         'method': Method.MRT,
         'relaxation_rates': [omega] + [1 + x * 1e-2 for x in range(1, 11)],
-        'compressible': True,
+        'compressible': False,
     },
-    'r-cm': {
+    'central': {
         'method': Method.CENTRAL_MOMENT,
         'relaxation_rate': omega,
         'compressible': True,
     },
-    'cm': {
+    'central-overrelax': {
         'method': Method.CENTRAL_MOMENT,
         'relaxation_rates': [omega] + [1 + x * 1e-2 for x in range(1, 11)],
         'compressible': True,
     },
-    'r-k': {
-        'method': Method.CUMULANT,
+    'cumulant': {
+        'method': Method.MONOMIAL_CUMULANT,
         'relaxation_rate': omega,
         'compressible': True,
     },
-    'k': {
-        'method': Method.CUMULANT,
+    'cumulant-overrelax': {
+        'method': Method.MONOMIAL_CUMULANT,
         'relaxation_rates': [omega] + [1 + x * 1e-2 for x in range(1, 18)],
         'compressible': True,
     },
+    'cumulant-K17': {
+        'method': Method.CUMULANT,
+        'relaxation_rate': omega,
+        'compressible': True,
+        'fourth_order_correction': 0.01
+    },
     'entropic': {
         'method': Method.TRT_KBC_N4,
         'compressible': True,
@@ -75,6 +75,11 @@ options_dict = {
         'method': Method.SRT,
         'subgrid_scale_model': SubgridScaleModel.SMAGORINSKY,
         'relaxation_rate': omega,
+    },
+    'qr': {
+        'method': Method.SRT,
+        'subgrid_scale_model': SubgridScaleModel.QR,
+        'relaxation_rate': omega,
     }
 }
 
@@ -83,6 +88,8 @@ info_header = """
 const char * infoStencil = "{stencil}";
 const char * infoStreamingPattern = "{streaming_pattern}";
 const char * infoCollisionSetup = "{collision_setup}";
+const bool vectorised = {vec};
+const bool nontemporal = {nt_stores};
 const bool infoCseGlobal = {cse_global};
 const bool infoCsePdfs = {cse_pdfs};
 """
@@ -90,10 +97,15 @@ const bool infoCsePdfs = {cse_pdfs};
 with CodeGeneration() as ctx:
     openmp = True if ctx.openmp else False
     field_type = "float64" if ctx.double_accuracy else "float32"
-    if ctx.optimize_for_localhost:
-        cpu_vec = {"nontemporal": False, "assume_aligned": True}
-    else:
-        cpu_vec = None
+    # This base pointer specification causes introduces temporary pointers in the outer loop such that the inner loop
+    # only contains aligned memory addresses. Doing so NT Stores are much more effective which causes great perfomance
+    # gains especially for the pull scheme on skylake architectures
+    base_pointer_spec = None  # [['spatialInner0'], ['spatialInner1']]
+    # cpu_vec = {"instruction_set": "best", "nontemporal": False,
+    #            "assume_aligned": True, 'assume_sufficient_line_padding': True}
+
+    cpu_vec = {"instruction_set": None}
+    nt_stores = False
 
     config_tokens = ctx.config.split('_')
 
@@ -110,7 +122,6 @@ with CodeGeneration() as ctx:
         raise ValueError("Only D3Q27 and D3Q19 stencil are supported at the moment")
 
     assert streaming_pattern in streaming_patterns, f"Invalid streaming pattern: {streaming_pattern}"
-
     options = options_dict[collision_setup]
 
     assert stencil.D == 3, "This application supports only three-dimensional stencils"
@@ -121,23 +132,20 @@ with CodeGeneration() as ctx:
     lbm_config = LBMConfig(stencil=stencil, field_name=pdfs.name, streaming_pattern=streaming_pattern, **options)
     lbm_opt = LBMOptimisation(cse_global=True, cse_pdfs=False, symbolic_field=pdfs, field_layout='fzyx')
 
+    # This creates a simplified version of the central moment collision operator where the bulk and shear viscosity is
+    # not seperated. This is done to get a fair comparison with the monomial cumulants.
     if lbm_config.method == Method.CENTRAL_MOMENT:
         lbm_config = replace(lbm_config, nested_moments=get_default_moment_set_for_stencil(stencil))
 
     if not is_inplace(streaming_pattern):
         lbm_opt = replace(lbm_opt, symbolic_temporary_field=pdfs_tmp)
-        field_swaps = [(pdfs, pdfs_tmp)]
-    else:
-        field_swaps = []
 
-    # Sweep for Stream only. This is for benchmarking an empty streaming pattern without LBM.
-    # is_inplace is set to False to ensure that the streaming is done with src and dst field.
-    # If this is not the case the compiler might simplify the streaming in a way that benchmarking makes no sense.
-    # accessor = CollideOnlyInplaceAccessor()
-    accessor = get_accessor(streaming_pattern, Timestep.EVEN)
-    #accessor.is_inplace = False
-    field_swaps_stream_only = () if accessor.is_inplace else [(pdfs, pdfs_tmp)]
-    stream_only_kernel = create_stream_only_kernel(stencil, pdfs, None if accessor.is_inplace else pdfs_tmp, accessor=accessor)
+    # This is a microbenchmark for testing how fast Q PDFs can be updated per cell. To avoid optimisations from
+    # the compiler the PDFs are shuffled inside a cell. Otherwise, for common streaming patterns compilers would
+    # typically remove the copy of the center PDF which results in an overestimation of the maximum performance
+    stream_only_kernel = []
+    for i in range(stencil.Q):
+        stream_only_kernel.append(ps.Assignment(pdfs(i), pdfs((i + 3) % stencil.Q)))
 
     # LB Sweep
     collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
@@ -153,17 +161,25 @@ with CodeGeneration() as ctx:
                          nonuniform=False, boundaries=[no_slip, ubb],
                          macroscopic_fields=macroscopic_fields,
                          cpu_openmp=openmp, cpu_vectorize_info=cpu_vec,
-                         set_pre_collision_pdfs=False)
+                         base_pointer_specification=base_pointer_spec)
 
     # Stream only kernel
+    cpu_vec_stream = None
+    if ctx.optimize_for_localhost:
+        cpu_vec_stream = {"instruction_set": "best", "nontemporal": True,
+                          "assume_aligned": True, 'assume_sufficient_line_padding': True,
+                          "assume_inner_stride_one": True}
+
     generate_sweep(ctx, 'UniformGridCPU_StreamOnlyKernel', stream_only_kernel,
-                   field_swaps=field_swaps_stream_only,
-                   target=ps.Target.CPU, cpu_openmp=openmp)
+                   target=ps.Target.CPU, cpu_openmp=openmp,
+                   cpu_vectorize_info=cpu_vec_stream, base_pointer_specification=[['spatialInner0'], ['spatialInner1']])
 
     infoHeaderParams = {
         'stencil': stencil_str,
         'streaming_pattern': streaming_pattern,
         'collision_setup': collision_setup,
+        'vec': int(True if cpu_vec else False),
+        'nt_stores': int(nt_stores),
         'cse_global': int(lbm_opt.cse_global),
         'cse_pdfs': int(lbm_opt.cse_pdfs),
     }
diff --git a/apps/benchmarks/UniformGridCPU/simulation_setup/benchmark_configs.py b/apps/benchmarks/UniformGridCPU/simulation_setup/benchmark_configs.py
index 21235056434f3daeebdbac212ec8de60d58810b4..3cfa7a915a40666747d2575bcb95136362124ddc 100644
--- a/apps/benchmarks/UniformGridCPU/simulation_setup/benchmark_configs.py
+++ b/apps/benchmarks/UniformGridCPU/simulation_setup/benchmark_configs.py
@@ -4,13 +4,26 @@ from waLBerla.tools.config import block_decomposition
 from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle
 import sys
 import sqlite3
-from math import prod
+
+try:
+    import machinestate as ms
+except ImportError:
+    ms = None
 
 # Number of time steps run for a workload of 128^3 per process
 # if double as many cells are on the process, half as many time steps are run etc.
 # increase this to get more reliable measurements
 TIME_STEPS_FOR_128_BLOCK = 10
 DB_FILE = os.environ.get('DB_FILE', "cpu_benchmark.sqlite3")
+BENCHMARK = int(os.environ.get('BENCHMARK', 0))
+
+WeakX = int(os.environ.get('WeakX', 128))
+WeakY = int(os.environ.get('WeakY', 128))
+WeakZ = int(os.environ.get('WeakZ', 128))
+
+StrongX = int(os.environ.get('StrongX', 128))
+StrongY = int(os.environ.get('StrongY', 128))
+StrongZ = int(os.environ.get('StrongZ', 128))
 
 
 def num_time_steps(block_size, time_steps_for_128_block=TIME_STEPS_FOR_128_BLOCK):
@@ -35,7 +48,7 @@ class Scenario:
     def __init__(self, cells_per_block=(128, 128, 128), periodic=(1, 1, 1), blocks_per_process=1,
                  timesteps=None, time_step_strategy="normal", omega=1.8, inner_outer_split=(1, 1, 1),
                  warmup_steps=2, outer_iterations=3, init_shear_flow=False, boundary_setup=False,
-                 vtk_write_frequency=0, remaining_time_logger_frequency=-1):
+                 vtk_write_frequency=0, remaining_time_logger_frequency=-1, db_file_name=None):
 
         if boundary_setup:
             init_shear_flow = False
@@ -58,6 +71,7 @@ class Scenario:
 
         self.vtk_write_frequency = vtk_write_frequency
         self.remaining_time_logger_frequency = remaining_time_logger_frequency
+        self.db_file_name = DB_FILE if db_file_name is None else db_file_name
 
         self.config_dict = self.config(print_dict=False)
 
@@ -101,6 +115,15 @@ class Scenario:
         data['compile_flags'] = wlb.build_info.compiler_flags
         data['walberla_version'] = wlb.build_info.version
         data['build_machine'] = wlb.build_info.build_machine
+
+        if ms:
+            state = ms.MachineState(extended=False, anonymous=True)
+            state.generate()                        # generate subclasses
+            state.update()                          # read information
+            data["MachineState"] = str(state.get())
+        else:
+            print("MachineState module is not available. MachineState was not saved")
+
         sequenceValuesToScalars(data)
 
         result = data
@@ -111,8 +134,8 @@ class Scenario:
         table_name = table_name.replace("-", "_")
         for num_try in range(num_tries):
             try:
-                checkAndUpdateSchema(result, table_name, DB_FILE)
-                storeSingle(result, table_name, DB_FILE)
+                checkAndUpdateSchema(result, table_name, self.db_file_name)
+                storeSingle(result, table_name, self.db_file_name)
                 break
             except sqlite3.OperationalError as e:
                 wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries}  {str(e)}")
@@ -156,18 +179,38 @@ def overlap_benchmark():
             scenarios.add(scenario)
 
 
-def scaling_benchmark():
-    """Tests different communication overlapping strategies"""
-    wlb.log_info_on_root("Running scaling benchmark")
+def weak_scaling_benchmark():
+    wlb.log_info_on_root("Running weak scaling benchmark with one block per proc")
     wlb.log_info_on_root("")
 
     scenarios = wlb.ScenarioManager()
-    cells_per_block = [(32, 32, 32), (128, 128, 128)]
 
-    for cell_per_block in cells_per_block:
-        scenarios.add(Scenario(time_step_strategy='noOverlap',
+    for t in ["noOverlap", "simpleOverlap"]:
+        scenarios.add(Scenario(time_step_strategy=t,
                                inner_outer_split=(1, 1, 1),
-                               cells_per_block=cell_per_block))
+                               cells_per_block=(WeakX, WeakY, WeakZ),
+                               boundary_setup=True,
+                               outer_iterations=1,
+                               db_file_name="weakScalingUniformGridOneBlock.sqlite3"))
+
+
+def strong_scaling_benchmark():
+    wlb.log_info_on_root("Running strong scaling benchmark with one block per proc")
+    wlb.log_info_on_root("")
+
+    scenarios = wlb.ScenarioManager()
+
+    domain_size = (StrongX, StrongY, StrongZ)
+    blocks = block_decomposition(wlb.mpi.numProcesses())
+    cells_per_block = tuple([d // b for d, b in zip(domain_size, reversed(blocks))])
+
+    for t in ["noOverlap", "simpleOverlap"]:
+        scenarios.add(Scenario(cells_per_block=cells_per_block,
+                               time_step_strategy=t,
+                               outer_iterations=1,
+                               timesteps=10,
+                               boundary_setup=True,
+                               db_file_name="strongScalingUniformGridOneBlock.sqlite3"))
 
 
 def single_node_benchmark():
@@ -176,13 +219,11 @@ def single_node_benchmark():
     wlb.log_info_on_root("")
 
     scenarios = wlb.ScenarioManager()
-    block_sizes = [(i, i, i) for i in (8, 16, 32, 64, 128)]
-    for block_size in block_sizes:
-        scenario = Scenario(cells_per_block=block_size,
-                            time_step_strategy='kernelOnly',
-                            outer_iterations=1,
-                            timesteps=num_time_steps(block_size))
-        scenarios.add(scenario)
+    scenario = Scenario(cells_per_block=(128, 128, 128),
+                        time_step_strategy='kernelOnly',
+                        outer_iterations=1,
+                        timesteps=10)
+    scenarios.add(scenario)
 
 
 def validation_run():
@@ -211,5 +252,15 @@ wlb.log_info_on_root(f"Batch run of benchmark scenarios, saving result to {DB_FI
 # performance of compute kernel (no communication)
 # overlap_benchmark()  # benchmarks different communication overlap options
 # profiling()  # run only two timesteps on a smaller domain for profiling only
-validation_run()
+# validation_run()
 # scaling_benchmark()
+
+
+if BENCHMARK == 0:
+    single_node_benchmark()
+elif BENCHMARK == 1:
+    weak_scaling_benchmark()
+elif BENCHMARK == 2:
+    strong_scaling_benchmark()
+else:
+    validation_run()
diff --git a/apps/benchmarks/UniformGridGPU/CMakeLists.txt b/apps/benchmarks/UniformGridGPU/CMakeLists.txt
index 2607004f3749f366f8155a0dd200202f00e45867..25ca68ed26239d63eded16ac1db639a10ba93c16 100644
--- a/apps/benchmarks/UniformGridGPU/CMakeLists.txt
+++ b/apps/benchmarks/UniformGridGPU/CMakeLists.txt
@@ -3,13 +3,16 @@ waLBerla_link_files_to_builddir( "*.py" )
 waLBerla_link_files_to_builddir( "simulation_setup" )
 
 
-foreach(streaming_pattern pull push aa esotwist)
+foreach(streaming_pattern pull push aa esotwist esopull esopush)
     foreach(stencil d3q19 d3q27)
-        foreach (collision_setup srt trt mrt mrt-overrelax central central-overrelax cumulant cumulant-overrelax entropic smagorinsky)
+        foreach (collision_setup srt trt mrt mrt-overrelax central central-overrelax cumulant cumulant-overrelax cumulant-K17 entropic smagorinsky qr)
 	    # KBC methods only for D2Q9 and D3Q27 defined
 	    if (${collision_setup} STREQUAL "entropic" AND ${stencil} STREQUAL "d3q19")
 		    continue()
-	    endif (${collision_setup} STREQUAL "entropic" AND ${stencil} STREQUAL "d3q19")
+        endif (${collision_setup} STREQUAL "entropic" AND ${stencil} STREQUAL "d3q19")
+        if (${collision_setup} STREQUAL "cumulant-K17" AND ${stencil} STREQUAL "d3q19")
+            continue()
+        endif (${collision_setup} STREQUAL "cumulant-K17" AND ${stencil} STREQUAL "d3q19")
             set(config ${stencil}_${streaming_pattern}_${collision_setup})
             waLBerla_generate_target_from_python(NAME UniformGridGPUGenerated_${config}
                     FILE UniformGridGPU.py
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp b/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp
index 91b7a02107c8e11e8b760aee1207895e436c5d3a..65e7b903ab1fde1f7f8cfd9e14c485076aa0978b 100644
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp
@@ -73,6 +73,8 @@ using SweepCollection_T = lbm::UniformGridGPUSweepCollection;
 
 using gpu::communication::UniformGPUScheme;
 
+using macroFieldType = VelocityField_T::value_type;
+
 int main(int argc, char** argv)
 {
    mpi::Environment const env(argc, argv);
@@ -103,9 +105,9 @@ int main(int argc, char** argv)
       const StorageSpecification_T StorageSpec = StorageSpecification_T();
       const BlockDataID pdfFieldCpuID  = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(1), field::fzyx);
 
-      auto allocator = make_shared< gpu::HostFieldAllocator<real_t> >(); // use pinned memory allocator for faster CPU-GPU memory transfers
-      const BlockDataID velFieldCpuID = field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx, uint_c(1), allocator);
-      const BlockDataID densityFieldCpuID = field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx, uint_c(1), allocator);
+      auto allocator = make_shared< gpu::HostFieldAllocator<macroFieldType> >(); // use pinned memory allocator for faster CPU-GPU memory transfers
+      const BlockDataID velFieldCpuID = field::addToStorage< VelocityField_T >(blocks, "vel", macroFieldType(0.0), field::fzyx, uint_c(1), allocator);
+      const BlockDataID densityFieldCpuID = field::addToStorage< ScalarField_T >(blocks, "density", macroFieldType(1.0), field::fzyx, uint_c(1), allocator);
       const BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field");
 
       // Initialize velocity on cpu
@@ -136,7 +138,7 @@ int main(int argc, char** argv)
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       ///                                      LB SWEEPS AND BOUNDARY HANDLING                                       ///
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      const pystencils::UniformGridGPU_StreamOnlyKernel StreamOnlyKernel(pdfFieldGpuID, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]);
+      const pystencils::UniformGridGPU_StreamOnlyKernel StreamOnlyKernel(pdfFieldGpuID);
 
       // Boundaries
       const FlagUID fluidFlagUID("Fluid");
@@ -211,7 +213,7 @@ int main(int argc, char** argv)
          vtkOutput->addBeforeFunction([&]() {
             for (auto& block : *blocks)
                sweepCollection.calculateMacroscopicParameters(&block);
-            gpu::fieldCpy< VelocityField_T, gpu::GPUField< real_t > >(blocks, velFieldCpuID, velFieldGpuID);
+            gpu::fieldCpy< VelocityField_T, gpu::GPUField< VelocityField_T::value_type > >(blocks, velFieldCpuID, velFieldGpuID);
          });
          timeLoop.addFuncAfterTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
       }
@@ -264,6 +266,13 @@ int main(int argc, char** argv)
             python_coupling::PythonCallback pythonCallbackResults("results_callback");
             if (pythonCallbackResults.isCallable())
             {
+               pythonCallbackResults.data().exposeValue("numProcesses", performance.processes());
+               pythonCallbackResults.data().exposeValue("numThreads", performance.threads());
+               pythonCallbackResults.data().exposeValue("numCores", performance.cores());
+               pythonCallbackResults.data().exposeValue("numberOfCells", performance.numberOfCells());
+               pythonCallbackResults.data().exposeValue("numberOfFluidCells", performance.numberOfFluidCells());
+               pythonCallbackResults.data().exposeValue("mlups", performance.mlups(timesteps, time));
+               pythonCallbackResults.data().exposeValue("mlupsPerCore", performance.mlupsPerCore(timesteps, time));
                pythonCallbackResults.data().exposeValue("mlupsPerProcess", performance.mlupsPerProcess(timesteps, time));
                pythonCallbackResults.data().exposeValue("stencil", infoStencil);
                pythonCallbackResults.data().exposeValue("streamingPattern", infoStreamingPattern);
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU.py b/apps/benchmarks/UniformGridGPU/UniformGridGPU.py
index 09235c4340a0b0946f72c513a219bca3c28dd724..10c562b39f095c4e8e0328e407e1aaa2270d6e57 100644
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU.py
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU.py
@@ -4,18 +4,16 @@ import pystencils as ps
 
 from dataclasses import replace
 
+from pystencils import Assignment
 from pystencils.typing import TypedSymbol
 from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisions
 
-from lbmpy import LBMConfig, LBMOptimisation, LBStencil, Method, Stencil
-from lbmpy.enums import SubgridScaleModel
+from lbmpy import LBMConfig, LBMOptimisation, LBStencil, Method, Stencil, SubgridScaleModel
 from lbmpy.advanced_streaming import is_inplace
 from lbmpy.advanced_streaming.utility import streaming_patterns
 from lbmpy.boundaries import NoSlip, UBB
 from lbmpy.creationfunctions import create_lb_collision_rule
 from lbmpy.moments import get_default_moment_set_for_stencil
-from lbmpy.updatekernels import create_stream_only_kernel
-from lbmpy.fieldaccess import *
 
 from pystencils_walberla import CodeGeneration, generate_info_header, generate_sweep
 from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator
@@ -75,6 +73,12 @@ options_dict = {
         'relaxation_rates': [omega] + [1 + x * 1e-2 for x in range(1, 18)],
         'compressible': True,
     },
+    'cumulant-K17': {
+        'method': Method.CUMULANT,
+        'relaxation_rate': omega,
+        'compressible': True,
+        'fourth_order_correction': 0.01
+    },
     'entropic': {
         'method': Method.TRT_KBC_N4,
         'compressible': True,
@@ -87,6 +91,11 @@ options_dict = {
         'method': Method.SRT,
         'subgrid_scale_model': SubgridScaleModel.SMAGORINSKY,
         'relaxation_rate': omega,
+    },
+    'qr': {
+        'method': Method.SRT,
+        'subgrid_scale_model': SubgridScaleModel.QR,
+        'relaxation_rate': omega,
     }
 }
 
@@ -102,7 +111,8 @@ const bool infoCsePdfs = {cse_pdfs};
 optimize = True
 
 with CodeGeneration() as ctx:
-    field_type = "float64" if ctx.double_accuracy else "float32"
+    pdf_data_type = "float64"
+    field_data_type = "float64"
     config_tokens = ctx.config.split('_')
 
     assert len(config_tokens) >= 3
@@ -125,8 +135,8 @@ with CodeGeneration() as ctx:
     options = options_dict[collision_setup]
 
     assert stencil.D == 3, "This application supports only three-dimensional stencils"
-    pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {field_type}[3D]", layout='fzyx')
-    density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_type}[3D]", layout='fzyx')
+    pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {pdf_data_type}[3D]", layout='fzyx')
+    density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_data_type}[3D]", layout='fzyx')
     macroscopic_fields = {'density': density_field, 'velocity': velocity_field}
 
     lbm_config = LBMConfig(stencil=stencil, field_name=pdfs.name, streaming_pattern=streaming_pattern, **options)
@@ -141,13 +151,12 @@ with CodeGeneration() as ctx:
     else:
         field_swaps = []
 
-    # Sweep for Stream only. This is for benchmarking an empty streaming pattern without LBM.
-    # is_inplace is set to False to ensure that the streaming is done with src and dst field.
-    # If this is not the case the compiler might simplify the streaming in a way that benchmarking makes no sense.
-    accessor = CollideOnlyInplaceAccessor()
-    accessor.is_inplace = False
-    field_swaps_stream_only = [(pdfs, pdfs_tmp)]
-    stream_only_kernel = create_stream_only_kernel(stencil, pdfs, pdfs_tmp, accessor=accessor)
+    # This is a microbenchmark for testing how fast Q PDFs can be updated per cell. To avoid optimisations from
+    # the compiler the PDFs are shuffled inside a cell. Otherwise, for common streaming patterns compilers would
+    # typically remove the copy of the center PDF which results in an overestimation of the maximum performance
+    stream_only_kernel = []
+    for i in range(stencil.Q):
+        stream_only_kernel.append(Assignment(pdfs(i), pdfs((i + 3) % stencil.Q)))
 
     # LB Sweep
     collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
@@ -159,9 +168,10 @@ with CodeGeneration() as ctx:
     lb_method = collision_rule.method
 
     no_slip = lbm_boundary_generator(class_name='NoSlip', flag_uid='NoSlip',
-                                     boundary_object=NoSlip())
+                                     boundary_object=NoSlip(), field_data_type=pdf_data_type)
     ubb = lbm_boundary_generator(class_name='UBB', flag_uid='UBB',
-                                 boundary_object=UBB([0.05, 0, 0], data_type=field_type))
+                                 boundary_object=UBB([0.05, 0, 0], data_type=field_data_type),
+                                 field_data_type=pdf_data_type)
 
     generate_lbm_package(ctx, name="UniformGridGPU",
                          collision_rule=collision_rule,
@@ -169,12 +179,12 @@ with CodeGeneration() as ctx:
                          nonuniform=False, boundaries=[no_slip, ubb],
                          macroscopic_fields=macroscopic_fields,
                          target=ps.Target.GPU, gpu_indexing_params=gpu_indexing_params,
-                         max_threads=max_threads, set_pre_collision_pdfs=False)
+                         data_type=field_data_type, pdfs_data_type=pdf_data_type,
+                         max_threads=max_threads)
 
     # Stream only kernel
-    vp = [('int32_t', 'cudaBlockSize0'), ('int32_t', 'cudaBlockSize1'), ('int32_t', 'cudaBlockSize2')]
-    generate_sweep(ctx, 'UniformGridGPU_StreamOnlyKernel', stream_only_kernel, field_swaps=field_swaps_stream_only,
-                   gpu_indexing_params=gpu_indexing_params, varying_parameters=vp, target=ps.Target.GPU,
+    generate_sweep(ctx, 'UniformGridGPU_StreamOnlyKernel', stream_only_kernel,
+                   gpu_indexing_params={'block_size': (128, 1, 1)}, target=ps.Target.GPU,
                    max_threads=max_threads)
 
     infoHeaderParams = {
diff --git a/apps/benchmarks/UniformGridCPU/simulation_setup/PizDaintJobScript.py b/apps/benchmarks/UniformGridGPU/old_ideas/PizDaintJobScript.py
similarity index 100%
rename from apps/benchmarks/UniformGridCPU/simulation_setup/PizDaintJobScript.py
rename to apps/benchmarks/UniformGridGPU/old_ideas/PizDaintJobScript.py
diff --git a/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs.py b/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs.py
index 74be4378e0e2acef0bcb3c36f0f6d64916bba6c8..346a596eff4ba057c20f50efe58566ff5e1d0514 100755
--- a/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs.py
+++ b/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs.py
@@ -6,11 +6,25 @@ import sys
 import sqlite3
 from math import prod
 
+try:
+    import machinestate as ms
+except ImportError:
+    ms = None
+
 # Number of time steps run for a workload of 128^3 per GPU
 # if double as many cells are on the GPU, half as many time steps are run etc.
 # increase this to get more reliable measurements
 TIME_STEPS_FOR_128_BLOCK = 1000
 DB_FILE = os.environ.get('DB_FILE', "gpu_benchmark.sqlite3")
+BENCHMARK = int(os.environ.get('BENCHMARK', 0))
+
+WeakX = int(os.environ.get('WeakX', 128))
+WeakY = int(os.environ.get('WeakY', 128))
+WeakZ = int(os.environ.get('WeakZ', 128))
+
+StrongX = int(os.environ.get('StrongX', 128))
+StrongY = int(os.environ.get('StrongY', 128))
+StrongZ = int(os.environ.get('StrongZ', 128))
 
 BASE_CONFIG = {
     'DomainSetup': {
@@ -39,6 +53,8 @@ ldc_setup = {'Border': [
 def num_time_steps(block_size, time_steps_for_128_block=200):
     cells = block_size[0] * block_size[1] * block_size[2]
     time_steps = (128 ** 3 / cells) * time_steps_for_128_block
+    if time_steps < 10:
+        time_steps = 10
     return int(time_steps)
 
 
@@ -61,13 +77,13 @@ class Scenario:
                  inner_outer_split=(1, 1, 1), warmup_steps=5, outer_iterations=3,
                  init_shear_flow=False, boundary_setup=False,
                  vtk_write_frequency=0, remaining_time_logger_frequency=-1,
-                 additional_info=None):
+                 additional_info=None, blocks=None, db_file_name=None):
 
         if boundary_setup:
             init_shear_flow = False
             periodic = (0, 0, 0)
 
-        self.blocks = block_decomposition(wlb.mpi.numProcesses())
+        self.blocks = blocks if blocks else block_decomposition(wlb.mpi.numProcesses())
 
         self.cells_per_block = cells_per_block
         self.periodic = periodic
@@ -85,6 +101,7 @@ class Scenario:
 
         self.vtk_write_frequency = vtk_write_frequency
         self.remaining_time_logger_frequency = remaining_time_logger_frequency
+        self.db_file_name = DB_FILE if db_file_name is None else db_file_name
 
         self.config_dict = self.config(print_dict=False)
         self.additional_info = additional_info
@@ -97,7 +114,6 @@ class Scenario:
                 'blocks': self.blocks,
                 'cellsPerBlock': self.cells_per_block,
                 'periodic': self.periodic,
-                'oneBlockPerProcess': True
             },
             'Parameters': {
                 'omega': self.omega,
@@ -115,7 +131,6 @@ class Scenario:
             'Logging': {
                 'logLevel': 'info',  # info progress detail tracing
             }
-
         }
         if self.boundary_setup:
             config_dict["Boundaries"] = ldc_setup
@@ -140,6 +155,15 @@ class Scenario:
         data['compile_flags'] = wlb.build_info.compiler_flags
         data['walberla_version'] = wlb.build_info.version
         data['build_machine'] = wlb.build_info.build_machine
+
+        if ms:
+            state = ms.MachineState(extended=False, anonymous=True)
+            state.generate()                        # generate subclasses
+            state.update()                          # read information
+            data["MachineState"] = str(state.get())
+        else:
+            print("MachineState module is not available. MachineState was not saved")
+
         sequenceValuesToScalars(data)
 
         result = data
@@ -150,8 +174,8 @@ class Scenario:
         table_name = table_name.replace("-", "_")  # - not allowed for table name would lead to syntax error
         for num_try in range(num_tries):
             try:
-                checkAndUpdateSchema(result, table_name, DB_FILE)
-                storeSingle(result, table_name, DB_FILE)
+                checkAndUpdateSchema(result, table_name, self.db_file_name)
+                storeSingle(result, table_name, self.db_file_name)
                 break
             except sqlite3.OperationalError as e:
                 wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries}  {str(e)}")
@@ -200,12 +224,70 @@ def overlap_benchmark():
         scenarios.add(scenario)
 
 
+def no_overlap_scaling(cuda_enabled_mpi=False):
+    """Tests different communication overlapping strategies"""
+    wlb.log_info_on_root("Running scaling benchmark without communication hiding")
+    wlb.log_info_on_root("")
+
+    scenarios = wlb.ScenarioManager()
+    # no overlap
+    scenarios.add(Scenario(cells_per_block=(256, 256, 256),
+                           cuda_blocks=(128, 1, 1),
+                           time_step_strategy='noOverlap',
+                           inner_outer_split=(1, 1, 1),
+                           cuda_enabled_mpi=cuda_enabled_mpi,
+                           outer_iterations=1))
+
+
+def weak_scaling_overlap(cuda_enabled_mpi=False):
+    """Tests different communication overlapping strategies"""
+    wlb.log_info_on_root("Running scaling benchmark with communication hiding")
+    wlb.log_info_on_root("")
+
+    scenarios = wlb.ScenarioManager()
+
+    # overlap
+    for t in ["noOverlap", "simpleOverlap"]:
+        scenarios.add(Scenario(cells_per_block=(WeakX, WeakY, WeakZ),
+                               cuda_blocks=(128, 1, 1),
+                               time_step_strategy=t,
+                               inner_outer_split=(8, 8, 8),
+                               cuda_enabled_mpi=cuda_enabled_mpi,
+                               outer_iterations=1,
+                               boundary_setup=True,
+                               db_file_name="weakScalingUniformGrid.sqlite3"))
+
+
+def strong_scaling_overlap(cuda_enabled_mpi=False):
+    wlb.log_info_on_root("Running strong scaling benchmark with one block per proc with communication hiding")
+    wlb.log_info_on_root("")
+
+    scenarios = wlb.ScenarioManager()
+
+    domain_size = (StrongX, StrongY, StrongZ)
+    blocks = block_decomposition(wlb.mpi.numProcesses())
+    cells_per_block = tuple([d // b for d, b in zip(domain_size, reversed(blocks))])
+
+    # overlap
+    for t in ["noOverlap", "simpleOverlap"]:
+        scenarios.add(Scenario(cells_per_block=cells_per_block,
+                               cuda_blocks=(128, 1, 1),
+                               time_step_strategy=t,
+                               inner_outer_split=(1, 1, 1),
+                               cuda_enabled_mpi=cuda_enabled_mpi,
+                               outer_iterations=1,
+                               timesteps=50,
+                               blocks=blocks,
+                               boundary_setup=True,
+                               db_file_name="strongScalingUniformGridOneBlock.sqlite3"))
+
+
 def single_gpu_benchmark():
     """Benchmarks only the LBM compute kernel"""
     wlb.log_info_on_root("Running single GPU benchmarks")
     wlb.log_info_on_root("")
 
-    gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 8))
+    gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 40))
     gpu_mem = gpu_mem_gb * (2 ** 30)
     gpu_type = os.environ.get('GPU_TYPE')
 
@@ -214,12 +296,8 @@ def single_gpu_benchmark():
         additional_info['gpu_type'] = gpu_type
 
     scenarios = wlb.ScenarioManager()
-    block_sizes = [(i, i, i) for i in (32, 64, 128, 256)]
-    cuda_blocks = [(32, 1, 1), (64, 1, 1), (128, 1, 1), (256, 1, 1), (512, 1, 1),
-                   (32, 2, 1), (64, 2, 1), (128, 2, 1), (256, 2, 1),
-                   (32, 4, 1), (64, 4, 1), (128, 4, 1),
-                   (32, 8, 1), (64, 8, 1),
-                   (32, 16, 1)]
+    block_sizes = [(i, i, i) for i in (128, 256, 320)]
+    cuda_blocks = [(128, 1, 1), ]
     for block_size in block_sizes:
         for cuda_block_size in cuda_blocks:
             # cuda_block_size = (256, 1, 1) and block_size = (64, 64, 64) would be cut to cuda_block_size = (64, 1, 1)
@@ -266,4 +344,14 @@ wlb.log_info_on_root(f"Batch run of benchmark scenarios, saving result to {DB_FI
 # performance of compute kernel (no communication)
 # overlap_benchmark()  # benchmarks different communication overlap options
 # profiling()  # run only two timesteps on a smaller domain for profiling only
-validation_run()
+# validation_run()
+
+if BENCHMARK == 0:
+    single_gpu_benchmark()
+elif BENCHMARK == 1:
+    weak_scaling_overlap(True)
+elif BENCHMARK == 2:
+    strong_scaling_overlap(True)
+else:
+    validation_run()
+
diff --git a/apps/showcases/Thermocapillary/benchmark.py b/apps/showcases/Thermocapillary/benchmark.py
index 6493eda5f56378c1d08d3814efcbbac6a7e57f09..a08e2688b72902aad30d672f1d1672235e17d8f6 100755
--- a/apps/showcases/Thermocapillary/benchmark.py
+++ b/apps/showcases/Thermocapillary/benchmark.py
@@ -6,6 +6,11 @@ import waLBerla as wlb
 from waLBerla.tools.config import block_decomposition
 from waLBerla.tools.sqlitedb import sequenceValuesToScalars
 
+try:
+    import machinestate as ms
+except ImportError:
+    ms = None
+
 
 def num_time_steps(block_size, time_steps_for_256_block=50):
     # Number of time steps run for a workload of 256^3 cells per process
@@ -137,6 +142,14 @@ class Scenario:
         data['walberla_version'] = wlb.build_info.version
         data['build_machine'] = wlb.build_info.build_machine
 
+        if ms:
+            state = ms.MachineState(extended=False, anonymous=True)
+            state.generate()                        # generate subclasses
+            state.update()                          # read information
+            data["MachineState"] = str(state.get())
+        else:
+            print("MachineState module is not available. MachineState was not saved")
+
         sequenceValuesToScalars(data)
 
         csv_file = f"thermocapillary_benchmark.csv"
diff --git a/extern/pybind11 b/extern/pybind11
index 7c33cdc2d39c7b99a122579f53bc94c8eb3332ff..f7b499615e14d70ab098a20deb0cdb3889998a1a 160000
--- a/extern/pybind11
+++ b/extern/pybind11
@@ -1 +1 @@
-Subproject commit 7c33cdc2d39c7b99a122579f53bc94c8eb3332ff
+Subproject commit f7b499615e14d70ab098a20deb0cdb3889998a1a
diff --git a/python/lbmpy_walberla/packing_kernels.py b/python/lbmpy_walberla/packing_kernels.py
index 8a8728031cd4eb56ba4366e25ef6fd85b1f2e1b5..862a6cdaa48e4bb9696dcc926824161f7c46ed94 100644
--- a/python/lbmpy_walberla/packing_kernels.py
+++ b/python/lbmpy_walberla/packing_kernels.py
@@ -7,8 +7,10 @@ import sympy as sp
 from jinja2 import Environment, PackageLoader, StrictUndefined
 
 from pystencils import Assignment, CreateKernelConfig, create_kernel, Field, FieldType, fields, Target
+from pystencils.astnodes import LoopOverCoordinate
+from pystencils.integer_functions import int_div
 from pystencils.stencil import offset_to_direction_string
-from pystencils.typing import TypedSymbol
+from pystencils.typing import TypedSymbol, BasicType, PointerType, FieldPointerSymbol
 from pystencils.stencil import inverse_direction
 from pystencils.bit_masks import flag_cond
 
@@ -18,7 +20,7 @@ from lbmpy.enums import Stencil
 from lbmpy.stencils import LBStencil
 
 from pystencils_walberla.cmake_integration import CodeGenerationContext
-from pystencils_walberla.kernel_selection import KernelFamily, KernelCallNode, SwitchNode
+from pystencils_walberla.kernel_selection import KernelFamily, KernelCallNode, SwitchNode, AbortNode
 from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env
 from pystencils_walberla.utility import config_from_context
 
@@ -101,6 +103,23 @@ class PackingKernelsCodegen:
         self.accessors = [get_accessor(streaming_pattern, t) for t in get_timesteps(streaming_pattern)]
         self.mask_field = fields(f'mask : uint32 [{self.dim}D]', layout=src_field.layout)
 
+        self.block_wise = True
+        if not self.inplace or not self.config.target == Target.GPU:
+            self.block_wise = False
+
+        self.index = TypedSymbol("index", dtype=BasicType(np.int64))
+        self.index_shape = TypedSymbol("_size_0", dtype=BasicType(np.int64))
+        self.src_ptr_type = PointerType(self.src_field.dtype, const=True, restrict=True, double_pointer=True)
+        self.src_ptr = FieldPointerSymbol(self.src_field.name, self.src_field.dtype, const=True)
+        self.dst_ptr_type = PointerType(self.dst_field.dtype, const=False, restrict=True, double_pointer=True)
+        self.dst_ptr = FieldPointerSymbol(self.dst_field.name, self.dst_field.dtype, const=False)
+
+        self.data_src = TypedSymbol(f"_data_{self.src_field.name}_dp", dtype=self.src_ptr_type)
+        self.data_dst = TypedSymbol(f"_data_{self.dst_field.name}_dp", dtype=self.dst_ptr_type)
+
+        self.f = sp.IndexedBase(self.data_src, shape=self.index_shape)
+        self.d = sp.IndexedBase(self.data_dst, shape=self.index_shape)
+
     def create_uniform_kernel_families(self, kernels_dict=None):
         kernels = dict() if kernels_dict is None else kernels_dict
 
@@ -115,6 +134,8 @@ class PackingKernelsCodegen:
 
     def create_nonuniform_kernel_families(self, kernels_dict=None):
         kernels = dict() if kernels_dict is None else kernels_dict
+        kernels['localCopyRedistribute'] = self.get_local_copy_redistribute_kernel_family()
+        kernels['localPartialCoalescence'] = self.get_local_copy_partial_coalescence_kernel_family()
         kernels['unpackRedistribute'] = self.get_unpack_redistribute_kernel_family()
         kernels['packPartialCoalescence'] = self.get_pack_partial_coalescence_kernel_family()
         kernels['zeroCoalescenceRegion'] = self.get_zero_coalescence_region_kernel_family()
@@ -231,7 +252,10 @@ class PackingKernelsCodegen:
         dir_string = offset_to_direction_string(comm_dir)
         streaming_dirs = self.get_streaming_dirs(comm_dir)
         src, dst = self._stream_out_accs(timestep)
-        assignments = []
+        assignments = list()
+        if self.block_wise:
+            assignments.append(Assignment(self.src_ptr, self.f[self.index]))
+            assignments.append(Assignment(self.dst_ptr, self.d[self.index]))
         dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs)
         if len(dir_indices) == 0:
             return None
@@ -283,15 +307,59 @@ class PackingKernelsCodegen:
         return create_kernel(assignments, config=config)
 
     def get_unpack_redistribute_kernel_family(self):
-        return self._construct_directionwise_kernel_family(self.get_unpack_redistribute_ast)
+        return self._construct_directionwise_kernel_family(self.get_unpack_redistribute_ast,
+                                                           exclude_time_step=Timestep.EVEN)
 
     def get_local_copy_redistribute_ast(self, comm_dir, timestep):
-        #   TODO
-        raise NotImplementedError()
+        assert not all(d == 0 for d in comm_dir)
+        ctr = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(self.stencil.D)]
+
+        dir_string = offset_to_direction_string(comm_dir)
+        streaming_dirs = self.get_streaming_dirs(inverse_direction(comm_dir))
+        dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs)
+        if len(dir_indices) == 0:
+            return None
+
+        # for inplace streaming the dst (fine grid) must always be on odd state
+        dst_timestep = Timestep.ODD if self.inplace else Timestep.BOTH
+
+        _, dst = self._stream_out_accs(dst_timestep)
+        src, _ = self._stream_out_accs(timestep)
+
+        src_abs = self.src_field.new_field_with_different_name(self.src_field.name)
+        src_abs.field_type = FieldType.CUSTOM
+
+        orthos = self.orthogonal_principals(comm_dir)
+        sub_dirs = self.contained_principals(comm_dir)
+        orthogonal_combinations = self.linear_combinations(orthos)
+        subdir_combinations = self.linear_combinations_nozero(sub_dirs)
+        second_gl_dirs = [o + s for o, s in product(orthogonal_combinations, subdir_combinations)]
+        negative_dir_correction = np.array([(1 if d == -1 else 0) for d in comm_dir])
+        assignments = []
+        for offset in orthogonal_combinations:
+            o = offset + negative_dir_correction
+            for d in range(self.values_per_cell):
+                field_acc = dst[d].get_shifted(*o)
+                src_access = [int_div(ctr[i], 2) + o for i, o in enumerate(src[d].offsets)]
+                assignments.append(Assignment(field_acc, src_abs.absolute_access(src_access, (d, ))))
+
+        for offset in second_gl_dirs:
+            o = offset + negative_dir_correction
+            for d in dir_indices:
+                field_acc = dst[d].get_shifted(*o)
+                src_access = [int_div(ctr[i], 2) + o for i, o in enumerate(src[d].offsets)]
+                assignments.append(Assignment(field_acc, src_abs.absolute_access(src_access, (d, ))))
+
+        function_name = f'localCopyRedistribute_{dir_string}' + timestep_suffix(timestep)
+        iteration_slice = tuple(slice(None, None, 2) for _ in range(self.dim))
+        config = CreateKernelConfig(function_name=function_name, iteration_slice=iteration_slice,
+                                    data_type=self.data_type, ghost_layers=0, allow_double_writes=True,
+                                    cpu_openmp=self.config.cpu_openmp, target=self.config.target)
+
+        return create_kernel(assignments, config=config)
 
     def get_local_copy_redistribute_kernel_family(self):
-        #   TODO
-        raise NotImplementedError()
+        return self._construct_directionwise_kernel_family(self.get_local_copy_redistribute_ast)
 
     # --------------------------- Pack / Unpack / LocalCopy Fine to Coarse ---------------------------------------------
 
@@ -322,7 +390,8 @@ class PackingKernelsCodegen:
         return ast
 
     def get_pack_partial_coalescence_kernel_family(self):
-        return self._construct_directionwise_kernel_family(self.get_pack_partial_coalescence_ast)
+        return self._construct_directionwise_kernel_family(self.get_pack_partial_coalescence_ast,
+                                                           exclude_time_step=Timestep.ODD)
 
     def get_unpack_coalescence_ast(self, comm_dir, timestep):
         config = replace(self.config, ghost_layers=0)
@@ -370,12 +439,53 @@ class PackingKernelsCodegen:
     def get_zero_coalescence_region_kernel_family(self):
         return self._construct_directionwise_kernel_family(self.get_zero_coalescence_region_ast)
 
-    #   TODO
     def get_local_copy_partial_coalescence_ast(self, comm_dir, timestep):
-        raise NotImplementedError()
+        assert not all(d == 0 for d in comm_dir)
+        ctr = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(self.stencil.D)]
+
+        dir_string = offset_to_direction_string(comm_dir)
+        streaming_dirs = self.get_streaming_dirs(comm_dir)
+        dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs)
+
+        if len(dir_indices) == 0:
+            return None
+        buffer = sp.symbols(f"b_:{self.values_per_cell}")
+
+        # for inplace streaming the src (fine grid) must always be on even state
+        src_timestep = Timestep.ODD if self.inplace else Timestep.BOTH
+
+        src, _ = self._stream_in_accs(src_timestep)
+        _, dst = self._stream_in_accs(timestep.next())
+        mask = self.mask_field
+
+        dst_abs = self.dst_field.new_field_with_different_name(self.dst_field.name)
+        dst_abs.field_type = FieldType.CUSTOM
+
+        coalescence_factor = sp.Rational(1, 2 ** self.dim)
+
+        offsets = list(product(*((0, 1) for _ in comm_dir)))
+        assignments = []
+        for i, d in enumerate(dir_indices):
+            acc = 0
+            for o in offsets:
+                acc += flag_cond(d, mask[o], src[d].get_shifted(*o))
+            assignments.append(Assignment(buffer[i], acc))
+
+        for i, d in enumerate(dir_indices):
+            index = dst[d].index
+            dst_access = [int_div(ctr[i], 2) + o for i, o in enumerate(dst[d].offsets)]
+            assignments.append(Assignment(dst_abs.absolute_access(dst_access, index),
+                                          dst_abs.absolute_access(dst_access, index) + coalescence_factor * buffer[i]))
+
+        iteration_slice = tuple(slice(None, None, 2) for _ in range(self.dim))
+        config = replace(self.config, iteration_slice=iteration_slice, ghost_layers=0)
+
+        ast = create_kernel(assignments, config=config)
+        ast.function_name = f'localPartialCoalescence_{dir_string}' + timestep_suffix(timestep)
+        return ast
 
     def get_local_copy_partial_coalescence_kernel_family(self):
-        raise NotImplementedError()
+        return self._construct_directionwise_kernel_family(self.get_local_copy_partial_coalescence_ast)
 
     # ------------------------------------------ Utility ---------------------------------------------------------------
 
@@ -425,7 +535,7 @@ class PackingKernelsCodegen:
 
     # --------------------------- Private Members ----------------------------------------------------------------------
 
-    def _construct_directionwise_kernel_family(self, create_ast_callback):
+    def _construct_directionwise_kernel_family(self, create_ast_callback, exclude_time_step=None):
         subtrees = []
         direction_symbol = TypedSymbol('dir', dtype='stencil::Direction')
         for t in get_timesteps(self.streaming_pattern):
@@ -439,7 +549,10 @@ class PackingKernelsCodegen:
                     continue
                 kernel_call = KernelCallNode(ast)
                 cases_dict[f"stencil::{dir_string}"] = kernel_call
-            subtrees.append(SwitchNode(direction_symbol, cases_dict))
+            if exclude_time_step is not None and t == exclude_time_step:
+                subtrees.append(AbortNode("This function can not be called! Please contact the waLBerla team"))
+            else:
+                subtrees.append(SwitchNode(direction_symbol, cases_dict))
 
         if not self.inplace:
             tree = subtrees[0]
diff --git a/python/lbmpy_walberla/storage_specification.py b/python/lbmpy_walberla/storage_specification.py
index 60fd96d242bdf0c9513a1ff3d7d99efdc5898ec3..ae56ff735896d92c5fba4e5f462cdcdde21f2395 100644
--- a/python/lbmpy_walberla/storage_specification.py
+++ b/python/lbmpy_walberla/storage_specification.py
@@ -113,8 +113,8 @@ def generate_lbm_storage_specification(generation_context: CodeGenerationContext
         'kernels': kernels,
         'direction_sizes': cg.get_direction_sizes(),
         'src_field': cg.src_field,
-        'dst_field': cg.dst_field
-
+        'dst_field': cg.dst_field,
+        'block_wise': cg.block_wise
     }
     if nonuniform:
         jinja_context['mask_field'] = cg.mask_field
diff --git a/python/lbmpy_walberla/sweep_collection.py b/python/lbmpy_walberla/sweep_collection.py
index bc8bdda49dcb88f897f7fa1ce23c9a9b101660a3..164dd94d30dd1fcc53ea1eedcc880cc1dede7191 100644
--- a/python/lbmpy_walberla/sweep_collection.py
+++ b/python/lbmpy_walberla/sweep_collection.py
@@ -1,13 +1,17 @@
 from dataclasses import replace
 from typing import Dict
 
+from jinja2 import Environment, PackageLoader, StrictUndefined
+
 import sympy as sp
 import numpy as np
 
-from pystencils import Target, create_kernel
+from pystencils import Target, create_kernel, Assignment
+from pystencils.bit_masks import flag_cond
 from pystencils.config import CreateKernelConfig
-from pystencils.field import Field
+from pystencils.field import Field, fields
 from pystencils.simp import add_subexpressions_for_field_reads
+from pystencils.typing import BasicType, PointerType, FieldPointerSymbol, TypedSymbol, CastFunc
 
 from lbmpy.advanced_streaming import is_inplace, get_accessor, Timestep
 from lbmpy.creationfunctions import LbmCollisionRule, LBMConfig, LBMOptimisation
@@ -17,8 +21,8 @@ from lbmpy.updatekernels import create_lbm_kernel, create_stream_only_kernel
 
 from pystencils_walberla.kernel_selection import KernelCallNode, KernelFamily
 from pystencils_walberla.utility import config_from_context
-from pystencils_walberla import generate_sweep_collection
-from lbmpy_walberla.utility import create_pdf_field
+from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env
+from lbmpy_walberla.utility import create_pdf_field, timestep_suffix
 
 from .alternating_sweeps import EvenIntegerCondition
 from .function_generator import kernel_family_function_generator
@@ -59,7 +63,6 @@ def generate_lbm_sweep_collection(ctx, class_name: str, collision_rule: LbmColli
                                          field_layout=lbm_optimisation.field_layout)
 
     config = replace(config, ghost_layers=0)
-
     function_generators = []
 
     def family(name):
@@ -68,6 +71,13 @@ def generate_lbm_sweep_collection(ctx, class_name: str, collision_rule: LbmColli
     def generator(name, kernel_family):
         return kernel_family_function_generator(name, kernel_family, namespace='lbm', max_threads=max_threads)
 
+    all_fields = collision_rule.bound_fields.union(collision_rule.free_fields)
+    all_fields.update({src_field, dst_field})
+    all_fields = list(sorted(all_fields, key=lambda e: str(e)))
+
+    bw_stream_collide = block_wise_stream_collide(class_name, collision_rule, lbm_config, src_field, dst_field, config)
+    bw_stream = block_wise_stream(class_name, collision_rule, lbm_config, src_field, dst_field, config)
+
     function_generators.append(generator('streamCollide', family("streamCollide")))
     function_generators.append(generator('collide', family("collide")))
     function_generators.append(generator('stream', family("stream")))
@@ -87,7 +97,65 @@ def generate_lbm_sweep_collection(ctx, class_name: str, collision_rule: LbmColli
                                                         namespace='lbm', max_threads=max_threads)
     function_generators.append(getter_generator)
 
-    generate_sweep_collection(ctx, class_name, function_generators, refinement_scaling)
+    contexts_function_generators = list()
+    for fct in function_generators:
+        contexts_function_generators.append(fct())
+
+    namespaces = set([context['namespace'] for context in contexts_function_generators])
+    assert len(namespaces) == 1, "All function_generators must output the same namespace!"
+    namespace = namespaces.pop()
+
+    headers = set()
+    for context in contexts_function_generators:
+        for header in context['interface_spec'].headers:
+            headers.add(header)
+        for header in context['kernel'].get_headers():
+            headers.add(header)
+
+    kernel_list = list()
+    for context in contexts_function_generators:
+        kernel_list.append(context['kernel'])
+
+    kernels = list()
+    for context in contexts_function_generators:
+        kernels.append({
+            'kernel': context['kernel'],
+            'function_name': context['function_name'],
+            'ghost_layers_to_include': 'ghost_layers',
+            'field': context['field'],
+            'max_threads': context['max_threads']
+        })
+
+    target = kernels[0]['kernel'].target
+
+    jinja_context = {
+        'block_stream_collide': bw_stream_collide,
+        'block_stream': bw_stream,
+        'all_fields': all_fields,
+        'pdf_field': src_field,
+        'kernel_list': kernel_list,
+        'kernels': kernels,
+        'namespace': namespace,
+        'class_name': class_name,
+        'headers': headers,
+        'target': target.name.lower(),
+        'is_gpu': target == Target.GPU,
+        'parameter_scaling': refinement_scaling,
+        'stencil_name': lbm_config.stencil.name,
+        'D': lbm_config.stencil.D,
+        'Q': lbm_config.stencil.Q,
+        'inplace': is_inplace(lbm_config.streaming_pattern)
+    }
+
+    env = Environment(loader=PackageLoader('lbmpy_walberla'), undefined=StrictUndefined)
+    add_pystencils_filters_to_jinja_env(env)
+
+    header = env.get_template("LBMSweepCollection.tmpl.h").render(**jinja_context)
+    source = env.get_template("LBMSweepCollection.tmpl.cpp").render(**jinja_context)
+
+    source_extension = "cu" if target == Target.GPU and ctx.cuda else "cpp"
+    ctx.write_file(f"{class_name}.h", header)
+    ctx.write_file(f"{class_name}.{source_extension}", source)
 
 
 class RefinementScaling:
@@ -175,7 +243,7 @@ def get_setter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopi
 
     default_dtype = config.data_type.default_factory()
 
-    get_timestep = {"field_name": pdfs.name, "function": "getTimestep"}
+    get_timestep = {"field_name": pdfs.name, "function": "getTimestepPlusOne"}
     temporary_fields = ()
     field_swaps = ()
 
@@ -252,3 +320,104 @@ def get_getter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopi
         family = KernelFamily(node, class_name, temporary_fields=temporary_fields, field_swaps=field_swaps)
 
     return family
+
+
+def block_wise_stream_collide(class_name, collision_rule, lbm_config, src_field, dst_field, config):
+
+    if not is_inplace(lbm_config.streaming_pattern):
+        return None
+    else:
+        ast_even, all_fields = create_block_wise_ast(collision_rule, src_field, dst_field,
+                                                     lbm_config, Timestep.EVEN, config, False)
+        even_call = KernelCallNode(ast_even)
+        ast_odd, _ = create_block_wise_ast(collision_rule, src_field, dst_field,
+                                           lbm_config, Timestep.ODD, config, False)
+        odd_call = KernelCallNode(ast_odd)
+        tree = EvenIntegerCondition('timestep', even_call, odd_call, parameter_dtype=np.uint8)
+
+    family = KernelFamily(tree, class_name)
+
+    indexed_to_field_name = dict()
+    for field in all_fields:
+        indexed_to_field_name[field.name] = f"_data_{field.name}_dp"
+
+    context = {
+        'kernel': family,
+        'all_fields': all_fields,
+        'namespace': 'lbm',
+        'function_name': 'blockStreamCollide',
+        'indexed_to_field_name': indexed_to_field_name,
+        'max_threads': None
+    }
+
+    return context
+
+
+def block_wise_stream(class_name, collision_rule, lbm_config, src_field, dst_field, config):
+
+    if not is_inplace(lbm_config.streaming_pattern):
+        return None
+    else:
+        ast_even, all_fields = create_block_wise_ast(collision_rule, src_field, dst_field,
+                                                     lbm_config, Timestep.EVEN, config, True)
+        even_call = KernelCallNode(ast_even)
+        ast_odd, _ = create_block_wise_ast(collision_rule, src_field, dst_field,
+                                           lbm_config, Timestep.ODD, config, True)
+        odd_call = KernelCallNode(ast_odd)
+        tree = EvenIntegerCondition('timestep', even_call, odd_call, parameter_dtype=np.uint8)
+
+    family = KernelFamily(tree, class_name)
+
+    indexed_to_field_name = dict()
+    for field in all_fields:
+        indexed_to_field_name[field.name] = f"_data_{field.name}_dp"
+
+    context = {
+        'kernel': family,
+        'all_fields': all_fields,
+        'namespace': 'lbm',
+        'function_name': 'blockStream',
+        'indexed_to_field_name': indexed_to_field_name,
+        'max_threads': None
+    }
+
+    return context
+
+
+def create_block_wise_ast(collision_rule, src_field, dst_field, lbm_config, timestep, config, stream_only):
+    stencil = lbm_config.stencil
+    streaming_pattern = lbm_config.streaming_pattern
+    default_dtype = config.data_type.default_factory()
+    config = replace(config, gpu_indexing_params={})
+
+    accessor = get_accessor(streaming_pattern, timestep)
+
+    if stream_only:
+        update_rule = create_stream_only_kernel(stencil, src_field, dst_field, accessor)
+    else:
+        update_rule = create_lbm_kernel(collision_rule, src_field, dst_field, accessor, data_type=default_dtype)
+
+    bound_fields = update_rule.bound_fields
+    free_fields = update_rule.free_fields
+
+    all_fields = list(bound_fields.union(free_fields))
+    all_fields.sort(key=lambda field: field.name)
+
+    index = TypedSymbol("index", dtype=BasicType(np.int64))
+    index_shape = TypedSymbol("_size_0", dtype=BasicType(np.int64))
+
+    ass = list()
+    for field in all_fields:
+        const = True if field in free_fields else False
+        ptr_type = PointerType(field.dtype, const=const, restrict=True, double_pointer=True)
+        ptr = FieldPointerSymbol(field.name, field.dtype, const=const)
+        f = sp.IndexedBase(TypedSymbol(f"_data_{field.name}_dp", dtype=ptr_type), shape=index_shape)
+        ass.append(Assignment(ptr, f[index]))
+
+    update_rule = ass + update_rule.all_assignments
+
+    ast = create_kernel(update_rule, config=config)
+    base_name = "kernel_BlockStream" if stream_only else "kernel_BlockStreamCollide"
+    ast.function_name = base_name + timestep_suffix(timestep)
+    ast.assumed_inner_stride_one = config.cpu_vectorize_info['assume_inner_stride_one']
+    return ast, all_fields
diff --git a/python/lbmpy_walberla/templates/BoundaryCollection.tmpl.h b/python/lbmpy_walberla/templates/BoundaryCollection.tmpl.h
index 453f78e093a5087bd80a3bf37dd1b20300558f3a..4e19d0692e724cbdea2644b0334823de0299c588 100644
--- a/python/lbmpy_walberla/templates/BoundaryCollection.tmpl.h
+++ b/python/lbmpy_walberla/templates/BoundaryCollection.tmpl.h
@@ -41,12 +41,12 @@ class {{class_name}}
    enum Type { ALL = 0, INNER = 1, OUTER = 2 };
 
 
-   {{class_name}}( {{- ["const shared_ptr<StructuredBlockForest> & blocks", "BlockDataID flagID_", "BlockDataID pdfsID_", "FlagUID domainUID_", [kernel_list|generate_constructor_parameters(['indexVector', 'indexVectorSize', 'pdfs'])], additional_constructor_arguments] | type_identifier_list -}} )
+   {{class_name}}( {{- ["const shared_ptr<StructuredBlockForest> & blocks", "BlockDataID flagID_", "BlockDataID pdfsID_", "FlagUID domainUID_", [kernel_list|generate_constructor_parameters(['indexVector', 'indexVectorSize', 'forceVector', 'forceVectorSize', 'pdfs'])], additional_constructor_arguments] | type_identifier_list -}} )
       : blocks_(blocks), flagID(flagID_), pdfsID(pdfsID_), domainUID(domainUID_)
    {
       {% for object_name, boundary_class, kernel, additional_data_handler in zip(object_names, boundary_classes, kernel_list, additional_data_handlers) -%}
 
-      {{object_name}} = std::make_shared< {{boundary_class}} >({{- ["blocks", [kernel|generate_function_collection_call(['indexVector', 'indexVectorSize', 'timestep', 'gpuStream'], use_field_ids=True)], additional_data_handler.constructor_argument_name] | type_identifier_list -}});
+      {{object_name}} = std::make_shared< {{boundary_class}} >({{- ["blocks", "pdfsID", [kernel|generate_function_collection_call(['indexVector', 'indexVectorSize', 'forceVector', 'forceVectorSize', 'pdfs', 'timestep', 'gpuStream'], use_field_ids=True)], additional_data_handler.constructor_argument_name] | type_identifier_list -}});
       {% endfor %}
 
       {% for object_name, flag_uid in zip(object_names, flag_uids) -%}
diff --git a/python/lbmpy_walberla/templates/LBMSweepCollection.tmpl.cpp b/python/lbmpy_walberla/templates/LBMSweepCollection.tmpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a691af5d128882cf83dced5c094ee78ed53c2ee4
--- /dev/null
+++ b/python/lbmpy_walberla/templates/LBMSweepCollection.tmpl.cpp
@@ -0,0 +1,156 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file {{class_name}}.cpp
+//! \\author pystencils
+//======================================================================================================================
+#include "{{class_name}}.h"
+
+{% if target is equalto 'cpu' -%}
+#define FUNC_PREFIX
+{%- elif target is equalto 'gpu' -%}
+#define FUNC_PREFIX __global__
+{%- endif %}
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Wunused-variable"
+#   pragma GCC diagnostic ignored "-Wignored-qualifiers"
+#endif
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_INTEL )
+#pragma warning push
+#pragma warning( disable :  1599 )
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 191
+#else
+#pragma diag_suppress 191
+#endif
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace {{namespace}} {
+
+{%if block_stream_collide -%}
+{{block_stream_collide['kernel']|generate_definitions(target, block_stream_collide['max_threads'])}}
+{{block_stream['kernel']|generate_definitions(target, block_stream['max_threads'])}}
+{%endif%}
+
+{% for kernel in kernels %}
+{{kernel['kernel']|generate_definitions(target, kernel['max_threads'])}}
+{% endfor %}
+
+void {{class_name}}::blockStreamCollide({{- ["[[maybe_unused]] uint_t level", "[[maybe_unused]] uint8_t timestep", ["[[maybe_unused]] gpuStream_t stream"] if target == 'gpu' else []] | type_identifier_list -}})
+{
+   {%if block_stream_collide -%}
+
+   {%if target is equalto 'gpu' -%}
+   dim3 _grid = grid_[level];
+   dim3 _block = block_[level];
+
+   {%- for field in block_stream_collide['all_fields'] %}
+   {{field.dtype.c_name}} ** {{block_stream_collide['indexed_to_field_name'][field.name]}} = {{field.name}}PointersGPU[level];
+   {%- endfor %}
+
+   {% else %}
+
+   {%- for field in block_stream_collide['all_fields'] %}
+   {{field.dtype.c_name}} ** {{block_stream_collide['indexed_to_field_name'][field.name]}} = {{field.name}}Pointers[level].data();
+   {%- endfor %}
+
+   {%- endif %}
+   const int64_t _size_0 = size_0[level];
+   int64_t _size_{{block_stream_collide['all_fields'][0].name}}_0 = size_1;
+   int64_t _size_{{block_stream_collide['all_fields'][0].name}}_1 = size_2;
+   int64_t _size_{{block_stream_collide['all_fields'][0].name}}_2 = size_3;
+
+   {{block_stream_collide['kernel']|generate_field_strides()|indent(3)}}
+   {{block_stream_collide['kernel']|generate_refs_for_kernel_parameters(prefix="this->", parameters_to_ignore=["_size_0"], ignore_fields=True, parameter_registration=parameter_scaling, level_known=True)|indent(3)}}
+   {{block_stream_collide['kernel']|generate_call(stream='stream', plain_kernel_call=True)|indent(3)}}
+
+   {%endif%}
+}
+
+void {{class_name}}::ghostLayerPropagation({{- ["[[maybe_unused]] uint_t level", "[[maybe_unused]] uint8_t timestep", ["[[maybe_unused]] gpuStream_t stream"] if target == 'gpu' else []] | type_identifier_list -}})
+{
+   {%if block_stream_collide -%}
+
+   {{block_stream['kernel']|generate_field_strides()|indent(3)}}
+
+   {%if target is equalto 'gpu' -%}
+   auto parallelSection_ = parallelStreams_.parallelSection( stream );
+   for (auto it = glPropagationPDFs[level].begin(); it != glPropagationPDFs[level].end(); it++){
+      if(it->second.empty()){ continue;}
+
+      int64_t _size_0 = int64_c(it->second.size());
+      int64_t _size_{{pdf_field.name}}_0 = std::get<0>(it->first);
+      int64_t _size_{{pdf_field.name}}_1 = std::get<1>(it->first);
+      int64_t _size_{{pdf_field.name}}_2 = std::get<2>(it->first);
+
+      {{pdf_field.dtype.c_name}} ** _data_{{pdf_field.name}}_dp = glPropagationPDFsGPU[level][it->first];
+      dim3 _grid = glPropagationGrid_[level][it->first];
+      dim3 _block = glPropagationBlock_[level][it->first];
+      parallelSection_.run([&]( auto s ) {
+      {{block_stream['kernel']|generate_call(stream='s', plain_kernel_call=True)|indent(9)}}
+      });
+   }
+
+   {% else %}
+
+   for (auto it = glPropagationPDFs[level].begin(); it != glPropagationPDFs[level].end(); it++){
+      if(it->second.empty()){ continue;}
+
+      int64_t _size_0 = int64_c(it->second.size());
+      int64_t _size_{{pdf_field.name}}_0 = std::get<0>(it->first);
+      int64_t _size_{{pdf_field.name}}_1 = std::get<1>(it->first);
+      int64_t _size_{{pdf_field.name}}_2 = std::get<2>(it->first);
+
+      {{pdf_field.dtype.c_name}} ** _data_{{pdf_field.name}}_dp = it->second.data();
+      {{block_stream['kernel']|generate_call(stream='s', plain_kernel_call=True)|indent(6)}}
+   }
+   {%- endif %}
+
+   {%endif%}
+}
+
+{% for kernel in kernels %}
+void {{class_name}}::{{kernel['function_name']}}( {{kernel['kernel']|generate_plain_parameter_list(ghost_layers=True)}} )
+{
+   {{kernel['kernel']|generate_call(ghost_layers_to_include=kernel['ghost_layers_to_include'], stream='stream')|indent(3)}}
+}
+void {{class_name}}::{{kernel['function_name']}}CellInterval( {{kernel['kernel']|generate_plain_parameter_list(cell_interval='ci')}})
+{
+   {{kernel['kernel']|generate_call(stream='stream', cell_interval='ci')|indent(3)}}
+}
+{% endfor %}
+
+
+} // namespace {{namespace}}
+} // namespace walberla
+
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic pop
+#endif
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_INTEL )
+#pragma warning pop
+#endif
diff --git a/python/lbmpy_walberla/templates/LBMSweepCollection.tmpl.h b/python/lbmpy_walberla/templates/LBMSweepCollection.tmpl.h
new file mode 100644
index 0000000000000000000000000000000000000000..9471b6f29bdf7951199b34b87e593fb4622e46d2
--- /dev/null
+++ b/python/lbmpy_walberla/templates/LBMSweepCollection.tmpl.h
@@ -0,0 +1,511 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file {{class_name}}.h
+//! \\author pystencils
+//======================================================================================================================
+
+#pragma once
+
+#include "blockforest/StructuredBlockForest.h"
+#include "blockforest/BlockID.h"
+#include "blockforest/Block.h"
+
+#include "core/DataTypes.h"
+#include "core/logging/Logging.h"
+#include "core/Macros.h"
+
+#include "field/AddToStorage.h"
+#include "field/FlagField.h"
+#include "field/iterators/FieldIterator.h"
+
+{% if target is equalto 'gpu' -%}
+#include "gpu/AddGPUFieldToStorage.h"
+#include "gpu/GPUField.h"
+#include "gpu/ParallelStreams.h"
+{%- endif %}
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+
+#include "field/SwapableCompare.h"
+#include "field/GhostLayerField.h"
+
+#include "stencil/Directions.h"
+#include "stencil/{{stencil_name}}.h"
+
+#include <set>
+#include <cmath>
+
+{% for header in headers %}
+#include {{header}}
+{% endfor %}
+
+using namespace std::placeholders;
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Wunused-parameter"
+#   pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace {{namespace}} {
+
+class {{class_name}}
+{
+ public:
+   enum Type { ALL = 0, INNER = 1, OUTER = 2 };
+   using sizeTuple = std::tuple<int64_t, int64_t, int64_t>;
+
+   {{class_name}}(const shared_ptr< StructuredBlockForest > & blocks, {{kernel_list|generate_constructor_parameters}}, const Cell & outerWidth=Cell(1, 1, 1))
+      : blocks_(blocks), {{ kernel_list|generate_constructor_initializer_list(parameter_registration=parameter_scaling) }}, outerWidth_(outerWidth)
+   {
+
+      {{kernel_list|generate_constructor(parameter_registration=parameter_scaling) |indent(6)}}
+      validInnerOuterSplit_ = true;
+
+      for (auto& iBlock : *blocks)
+      {
+         if (int_c(blocks->getNumberOfXCells(iBlock)) <= outerWidth_[0] * 2 ||
+             int_c(blocks->getNumberOfYCells(iBlock)) <= outerWidth_[1] * 2 ||
+             int_c(blocks->getNumberOfZCells(iBlock)) <= outerWidth_[2] * 2)
+            validInnerOuterSplit_ = false;
+      }
+   }
+
+   void initialiseBlockPointer()
+   {
+      {%if block_stream_collide -%}
+      blockWise_ = true;
+
+      size_0.resize(blocks_->getNumberOfLevels());
+      {%- for field in block_stream_collide['all_fields'] %}
+      {{field.name}}Pointers.resize(blocks_->getNumberOfLevels());
+      {%if target is equalto 'gpu' -%} {{field.name}}PointersGPU.resize(blocks_->getNumberOfLevels()); {% endif %}
+      {%- endfor %}
+
+      {%if target is equalto 'gpu' -%} block_.resize(blocks_->getNumberOfLevels()); {% endif %}
+      {%if target is equalto 'gpu' -%} grid_.resize(blocks_->getNumberOfLevels()); {% endif %}
+
+      glPropagationPDFs.resize(blocks_->getNumberOfLevels());
+      {%if target is equalto 'gpu' -%} glPropagationPDFsGPU.resize(blocks_->getNumberOfLevels()); {% endif %}
+
+      {%if target is equalto 'gpu' -%} glPropagationBlock_.resize(blocks_->getNumberOfLevels()); {% endif %}
+      {%if target is equalto 'gpu' -%} glPropagationGrid_.resize(blocks_->getNumberOfLevels()); {% endif %}
+
+      for( auto it = blocks_->begin(); it != blocks_->end(); ++it )
+      {
+         auto* local = dynamic_cast< Block* >(it.get());
+         {%- for field in block_stream_collide['all_fields'] %}
+         auto {{field.name}} = local->getData< {{field | field_type(is_gpu=is_gpu)}} >({{field.name}}ID);
+         {%- endfor %}
+
+         size_1 = int64_c({{block_stream_collide['all_fields'][0].name}}->xSize());
+         size_2 = int64_c({{block_stream_collide['all_fields'][0].name}}->ySize());
+         size_3 = int64_c({{block_stream_collide['all_fields'][0].name}}->zSize());
+
+         {%- for field in block_stream_collide['all_fields'] %}
+
+         stride_{{field.name}}_0 = int64_c({{field.name}}->xStride());
+         stride_{{field.name}}_1 = int64_c({{field.name}}->yStride());
+         stride_{{field.name}}_2 = int64_c({{field.name}}->zStride());
+         stride_{{field.name}}_3 = int64_c(1 * int64_c({{field.name}}->fStride()));
+
+         {%- endfor %}
+         break;
+      }
+
+      for( auto it = blocks_->begin(); it != blocks_->end(); ++it )
+      {
+         auto* local = dynamic_cast< Block* >(it.get());
+         const uint_t level = local->getLevel();
+         {%- for field in block_stream_collide['all_fields'] %}
+         auto {{field.name}} = local->getData< {{field | field_type(is_gpu=is_gpu)}} >({{field.name}}ID);
+         {{field.name}}Pointers[level].emplace_back({{field.name}}->dataAt(0, 0, 0, 0));
+         {%- endfor %}
+
+
+         for(auto dir = stencil::{{stencil_name}}::beginNoCenter(); dir != stencil::{{stencil_name}}::end(); ++dir){
+            uint_t nSecIdx = blockforest::getBlockNeighborhoodSectionIndex(*dir);
+            // Propagate on ghost layers shadowing coarse or no blocks
+            if(local->neighborhoodSectionHasLargerBlock(nSecIdx)){
+               CellInterval ci;
+               {{pdf_field.name}}->getGhostRegion(*dir, ci, 1);
+               sizeTuple dirTuple = std::make_tuple(int64_c(ci.xSize()), int64_c(ci.ySize()), int64_c(ci.zSize()));
+               glPropagationPDFs[level][dirTuple].emplace_back({{pdf_field.name}}->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0));
+            }
+         }
+      }
+
+      for (uint_t level = 0; level < blocks_->getNumberOfLevels(); level++) {
+         size_0[level] = int64_c({{pdf_field.name}}Pointers[level].size());
+
+         {%if target is equalto 'gpu' -%}
+
+         int64_t indexingX = size_1 * size_0[level];
+         int64_t indexingY = size_2;
+         int64_t indexingZ = size_3;
+
+         int64_t cudaBlockSize0 = 128;
+         int64_t cudaBlockSize1 = 1;
+         int64_t cudaBlockSize2 = 1;
+
+         block_[level] = dim3((unsigned int)((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)), (unsigned int)((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))), (unsigned int)((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))));
+         grid_[level]  = dim3((unsigned int)(( (indexingX) % (((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) == 0 ? (int64_t)(indexingX) / (int64_t)(((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) : ( (int64_t)(indexingX) / (int64_t)(((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) ) +1 )), (unsigned int)(( (indexingY) % (((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) == 0 ? (int64_t)(indexingY) / (int64_t)(((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) : ( (int64_t)(indexingY) / (int64_t)(((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) ) +1 )), (unsigned int)(( (indexingZ) % (((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) == 0 ? (int64_t)(indexingZ) / (int64_t)(((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) : ( (int64_t)(indexingZ) / (int64_t)(((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) ) +1 )));
+
+         {%- for field in block_stream_collide['all_fields'] %}
+
+         WALBERLA_GPU_CHECK(gpuMalloc( (void**)&{{field.name}}PointersGPU[level], sizeof({{pdf_field.dtype.c_name}}* ) * {{field.name}}Pointers[level].size() ));
+         WALBERLA_GPU_CHECK(gpuMemcpy( {{field.name}}PointersGPU[level], &{{field.name}}Pointers[level][0], sizeof({{pdf_field.dtype.c_name}} *) * {{field.name}}Pointers[level].size(), gpuMemcpyHostToDevice ));
+
+         {%- endfor %}
+
+         for (auto it = glPropagationPDFs[level].begin(); it != glPropagationPDFs[level].end(); it++){
+            if(it->second.empty()){ continue;}
+
+            indexingX = std::get<0>(it->first) * int64_c(it->second.size());
+            indexingY = std::get<1>(it->first);
+            indexingZ = std::get<2>(it->first);
+
+            cudaBlockSize0 = 32;
+            cudaBlockSize1 = 1;
+            cudaBlockSize2 = 1;
+
+            glPropagationBlock_[level][it->first] = dim3((unsigned int)((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)), (unsigned int)((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))), (unsigned int)((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))));
+            glPropagationGrid_[level][it->first]  = dim3((unsigned int)(( (indexingX) % (((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) == 0 ? (int64_t)(indexingX) / (int64_t)(((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) : ( (int64_t)(indexingX) / (int64_t)(((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) ) +1 )), (unsigned int)(( (indexingY) % (((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) == 0 ? (int64_t)(indexingY) / (int64_t)(((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) : ( (int64_t)(indexingY) / (int64_t)(((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) ) +1 )), (unsigned int)(( (indexingZ) % (((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) == 0 ? (int64_t)(indexingZ) / (int64_t)(((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) : ( (int64_t)(indexingZ) / (int64_t)(((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) ) +1 )));
+
+            WALBERLA_GPU_CHECK(gpuMalloc( (void**)&glPropagationPDFsGPU[level][it->first], sizeof({{pdf_field.dtype.c_name}}* ) * it->second.size() ))
+            WALBERLA_GPU_CHECK(gpuMemcpy( glPropagationPDFsGPU[level][it->first], &it->second[0], sizeof({{pdf_field.dtype.c_name}}* ) * it->second.size(), gpuMemcpyHostToDevice ))
+         }
+
+         {% endif %}
+      }
+      {% endif %}
+
+   };
+
+   {%if block_stream_collide -%}
+   ~{{class_name}}() {
+      {%if target is equalto 'gpu' -%}
+      for (uint_t level = 0; level < blocks_->getNumberOfLevels(); level++)
+      {
+         {%- for field in block_stream_collide['all_fields'] %}
+         if(!{{field.name}}Pointers[level].empty()){
+            WALBERLA_GPU_CHECK(gpuFree({{field.name}}PointersGPU[level]))
+         }
+         {%- endfor %}
+
+         for (auto it = glPropagationPDFs[level].begin(); it != glPropagationPDFs[level].end(); it++){
+            if(it->second.empty()){ continue;}
+            WALBERLA_GPU_CHECK(gpuFree(glPropagationPDFsGPU[level][it->first]))
+         }
+      }
+      {%- endif %}
+   }
+   {% else %}
+   {{ kernel_list| generate_destructor(class_name) |indent(4) }}
+   {% endif %}
+
+
+   /*************************************************************************************
+   *                Internal Function Definitions with raw Pointer
+   *************************************************************************************/
+
+   void blockStreamCollide({{- ["uint_t level", "uint8_t timestep", ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}});
+   void ghostLayerPropagation({{- ["uint_t level", "uint8_t timestep", ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}});
+   bool blockWise() {return blockWise_;};
+
+   {%- for kernel in kernels %}
+   static void {{kernel['function_name']}} ({{kernel['kernel']|generate_plain_parameter_list(ghost_layers=0, stream="nullptr")}});
+   static void {{kernel['function_name']}}CellInterval ({{kernel['kernel']|generate_plain_parameter_list(cell_interval='ci', stream="nullptr")}});
+   {% endfor %}
+
+   /*************************************************************************************
+   *                Function Definitions for external Usage
+   *************************************************************************************/
+
+   std::function<void ()> blockStreamCollideFck({{- ["uint_t level", "uint8_t timestep", ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} )
+   {
+      return [{{- ["this", "level", "timestep", ["stream"] if target == 'gpu' else []] | type_identifier_list -}}](){
+         blockStreamCollide({{- ["level", "timestep", ["stream"] if target == 'gpu' else []] | type_identifier_list -}});
+      };
+   }
+
+   void streamCollideOverBlocks({{- ["uint_t level", "uint8_t timestep", ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} )
+   {
+      blockStreamCollide({{- ["level", "timestep", ["stream"] if target == 'gpu' else []] | type_identifier_list -}});
+   }
+
+
+   {%- for kernel in kernels %}
+
+   std::function<void (IBlock *)> {{kernel['function_name']}}()
+   {
+      return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", ] | type_identifier_list -}}); };
+   }
+
+   std::function<void (IBlock *)> {{kernel['function_name']}}({{- ["Type type", ] | type_identifier_list -}})
+   {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
+      switch (type)
+      {
+      case Type::INNER:
+         return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", ] | type_identifier_list -}}); };
+      case Type::OUTER:
+         return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", ] | type_identifier_list -}}); };
+      default:
+         return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", ] | type_identifier_list -}}); };
+      }
+   }
+
+   std::function<void (IBlock *)> {{kernel['function_name']}}({{- ["Type type", "const cell_idx_t ghost_layers"] | type_identifier_list -}})
+   {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
+      switch (type)
+      {
+      case Type::INNER:
+         return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", ] | type_identifier_list -}}); };
+      case Type::OUTER:
+         return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", ] | type_identifier_list -}}); };
+      default:
+         return [{{- ["this", "ghost_layers"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", "ghost_layers"] | type_identifier_list -}}); };
+      }
+   }
+
+   {% if target is equalto 'gpu' -%}
+   std::function<void (IBlock *)> {{kernel['function_name']}}({{- ["Type type", "const cell_idx_t ghost_layers", "gpuStream_t gpuStream"] | type_identifier_list -}})
+   {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
+      switch (type)
+      {
+      case Type::INNER:
+         return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", "gpuStream"] | type_identifier_list -}}); };
+      case Type::OUTER:
+         return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", "gpuStream"] | type_identifier_list -}}); };
+      default:
+         return [{{- ["this", "ghost_layers", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", "ghost_layers", "gpuStream"] | type_identifier_list -}}); };
+      }
+   }
+
+   std::function<void (IBlock *)> {{kernel['function_name']}}({{- ["Type type", "gpuStream_t gpuStream"] | type_identifier_list -}})
+   {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
+      switch (type)
+      {
+      case Type::INNER:
+         return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", "gpuStream"] | type_identifier_list -}}); };
+      case Type::OUTER:
+         return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", "gpuStream"] | type_identifier_list -}}); };
+      default:
+         return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", "cell_idx_c(0)", "gpuStream"] | type_identifier_list -}}); };
+      }
+   }
+   {%- endif %}
+
+   void {{kernel['function_name']}}({{- ["IBlock * block",] | type_identifier_list -}})
+   {
+      const cell_idx_t ghost_layers = 0;
+      {% if target is equalto 'gpu' -%}
+      gpuStream_t gpuStream = nullptr;
+      {%- endif %}
+
+      {{kernel['kernel']|generate_block_data_to_field_extraction|indent(6)}}
+      {{kernel['kernel']|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True, parameter_registration=parameter_scaling)|indent(6)}}
+      {{kernel['kernel']|generate_timestep_advancements|indent(6)}}
+      {{kernel['function_name']}}({{kernel['kernel']|generate_function_collection_call(ghost_layers='ghost_layers')}});
+      {{kernel['kernel']|generate_swaps|indent(6)}}
+   }
+
+   void {{kernel['function_name']}}({{- ["IBlock * block", "const cell_idx_t ghost_layers"] | type_identifier_list -}})
+   {
+      {% if target is equalto 'gpu' -%}
+      gpuStream_t gpuStream = nullptr;
+      {%- endif %}
+
+      {{kernel['kernel']|generate_block_data_to_field_extraction|indent(6)}}
+      {{kernel['kernel']|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True, parameter_registration=parameter_scaling)|indent(6)}}
+      {{kernel['kernel']|generate_timestep_advancements|indent(6)}}
+      {{kernel['function_name']}}({{kernel['kernel']|generate_function_collection_call(ghost_layers='ghost_layers')}});
+      {{kernel['kernel']|generate_swaps|indent(6)}}
+   }
+
+   {% if target is equalto 'gpu' -%}
+   void {{kernel['function_name']}}({{- ["IBlock * block", "const cell_idx_t ghost_layers", "gpuStream_t gpuStream"] | type_identifier_list -}})
+   {
+      {{kernel['kernel']|generate_block_data_to_field_extraction|indent(6)}}
+      {{kernel['kernel']|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True, parameter_registration=parameter_scaling)|indent(6)}}
+      {{kernel['kernel']|generate_timestep_advancements|indent(6)}}
+      {{kernel['function_name']}}({{kernel['kernel']|generate_function_collection_call(ghost_layers='ghost_layers')}});
+      {{kernel['kernel']|generate_swaps|indent(6)}}
+   }
+   {%- endif %}
+
+   void {{kernel['function_name']}}CellInterval({{- ["IBlock * block", "const CellInterval & ci", ["gpuStream_t gpuStream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}})
+   {
+      {{kernel['kernel']|generate_block_data_to_field_extraction|indent(6)}}
+      {{kernel['kernel']|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True, parameter_registration=parameter_scaling)|indent(6)}}
+      {{kernel['kernel']|generate_timestep_advancements|indent(6)}}
+      {{kernel['function_name']}}CellInterval({{kernel['kernel']|generate_function_collection_call(cell_interval='ci')}});
+      {{kernel['kernel']|generate_swaps|indent(6)}}
+   }
+
+   void {{kernel['function_name']}}Inner({{- ["IBlock * block", ["gpuStream_t gpuStream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}})
+   {
+      {{kernel['kernel']|generate_block_data_to_field_extraction|indent(6)}}
+      {{kernel['kernel']|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True, parameter_registration=parameter_scaling)|indent(6)}}
+      {{kernel['kernel']|generate_timestep_advancements(advance=False)|indent(6)}}
+
+      CellInterval inner = {{kernel['field']}}->xyzSize();
+      inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2]));
+
+      {{kernel['function_name']}}CellInterval({{kernel['kernel']|generate_function_collection_call(cell_interval='inner')}});
+   }
+
+   void {{kernel['function_name']}}Outer({{- ["IBlock * block", ["gpuStream_t gpuStream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}})
+   {
+
+      {{kernel['kernel']|generate_block_data_to_field_extraction|indent(6)}}
+      {{kernel['kernel']|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True, parameter_registration=parameter_scaling)|indent(6)}}
+      {{kernel['kernel']|generate_timestep_advancements|indent(6)}}
+
+      if( layers_.empty() )
+      {
+         CellInterval ci;
+
+         {{kernel['field']}}->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+         {{kernel['field']}}->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+
+         {{kernel['field']}}->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+         {{kernel['field']}}->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+
+         {{kernel['field']}}->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+         {{kernel['field']}}->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+      }
+
+      {%if target is equalto 'gpu'%}
+      {
+         auto parallelSection_ = parallelStreams_.parallelSection( gpuStream );
+         for( auto & ci: layers_ )
+         {
+            parallelSection_.run([&]( auto s ) {
+               {{kernel['function_name']}}CellInterval({{kernel['kernel']|generate_function_collection_call(cell_interval='ci')}});
+            });
+         }
+      }
+      {% else %}
+      for( auto & ci: layers_ )
+      {
+         {{kernel['function_name']}}CellInterval({{kernel['kernel']|generate_function_collection_call(cell_interval='ci')}});
+      }
+      {% endif %}
+
+      {{kernel['kernel']|generate_swaps|indent(9)}}
+   }
+   {% endfor %}
+
+   {%if target is equalto 'gpu'%}
+   void setOuterPriority(int priority)
+   {
+      parallelStreams_.setStreamPriority(priority);
+   }
+   {%endif%}
+
+ private:
+   shared_ptr< StructuredBlockForest > blocks_;
+   {{kernel_list|generate_members(parameter_registration=parameter_scaling)|indent(4)}}
+
+   Cell outerWidth_;
+   std::vector<CellInterval> layers_;
+   bool validInnerOuterSplit_;
+   bool blockWise_{false};
+
+   {%if target is equalto 'gpu' -%}
+   gpu::ParallelStreams parallelStreams_;
+   {%- endif %}
+
+   {%if block_stream_collide -%}
+
+   std::vector<int64_t> size_0;
+
+   int64_t size_1;
+   int64_t size_2;
+   int64_t size_3;
+
+   {%- for field in block_stream_collide['all_fields'] %}
+   int64_t stride_{{field.name}}_0;
+   int64_t stride_{{field.name}}_1;
+   int64_t stride_{{field.name}}_2;
+   int64_t stride_{{field.name}}_3;
+   {% endfor %}
+
+   {%- for field in block_stream_collide['all_fields'] %}
+   std::vector<std::vector<{{field.dtype.c_name}} *>> {{field.name}}Pointers;
+   {% endfor -%}
+
+   std::vector<std::map<sizeTuple, std::vector<{{pdf_field.dtype.c_name}} *>>> glPropagationPDFs;
+   {%if target is equalto 'gpu' -%}
+
+
+   {%- for field in block_stream_collide['all_fields'] %}
+   std::vector<{{field.dtype.c_name}} **> {{field.name}}PointersGPU;
+   {% endfor -%}
+
+   std::vector<dim3> block_;
+   std::vector<dim3> grid_;
+
+   std::vector<std::map<sizeTuple, {{pdf_field.dtype.c_name}} **>> glPropagationPDFsGPU;
+
+   std::vector<std::map<sizeTuple, dim3>> glPropagationBlock_;
+   std::vector<std::map<sizeTuple, dim3>> glPropagationGrid_;
+   {%- endif %}
+   {%- endif %}
+};
+
+
+} // namespace {{namespace}}
+} // namespace walberla
+
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic pop
+#endif
diff --git a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp
index 92106cddf0248c93707e6881b600b6a702d27985..9558d0d2fa2e9aa2c92e05f933394adbdd2deb68 100644
--- a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp
+++ b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp
@@ -19,12 +19,45 @@
 
 #include "{{class_name}}.h"
 
+
+{% if target is equalto 'cpu' -%}
+#define FUNC_PREFIX
+{%- elif target is equalto 'gpu' -%}
+#define FUNC_PREFIX __global__
+#include "gpu/GPUWrapper.h"
+#include "gpu/GPUField.h"
+{%- endif %}
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+
 #if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
 #   pragma GCC diagnostic push
 #   pragma GCC diagnostic ignored "-Wfloat-equal"
 #   pragma GCC diagnostic ignored "-Wshadow"
 #   pragma GCC diagnostic ignored "-Wconversion"
 #   pragma GCC diagnostic ignored "-Wunused-variable"
+#   pragma GCC diagnostic ignored "-Wignored-qualifiers"
+#endif
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_INTEL )
+#pragma warning push
+#pragma warning( disable :  1599 )
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 191
+#else
+#pragma diag_suppress 191
+#endif
 #endif
 
 namespace walberla {
@@ -42,6 +75,8 @@ namespace {{namespace}} {
    {{ kernels['localCopyDirection'] | generate_definitions }}
 
    {% if nonuniform -%}
+   {{ kernels['localCopyRedistribute']    | generate_definitions }}
+   {{ kernels['localPartialCoalescence']    | generate_definitions }}
    {{ kernels['unpackRedistribute']    | generate_definitions }}
    {{ kernels['packPartialCoalescence']    | generate_definitions }}
    {{ kernels['zeroCoalescenceRegion']    | generate_definitions }}
@@ -89,7 +124,7 @@ namespace {{namespace}} {
       WALBERLA_ASSERT_EQUAL(srcInterval.zSize(), dstInterval.zSize())
 
       {{kernels['localCopyAll']
-               | generate_call(cell_interval={src_field : 'srcInterval', dst_field : 'dstInterval'}, stream='stream')
+               | generate_call(cell_interval={src_field.name : 'srcInterval', dst_field.name : 'dstInterval'}, stream='stream')
                | indent(6) }}
    }
 
@@ -115,6 +150,56 @@ namespace {{namespace}} {
       {{kernels['unpackDirection'] | generate_call(cell_interval='ci', stream='stream') | indent(6) }}
    }
 
+   void {{class_name}}::PackKernels::localCopyDirection(
+      {{- [src_field.dtype.c_name + "** _data_" + src_field.name + "_dp", dst_field.dtype.c_name + "** _data_" + dst_field.name + "_dp",
+            kernels['localCopyDirection'].kernel_selection_parameters,
+            ["gpuStream_t stream"] if is_gpu else [], "std::array<int64_t, 4>& _sizes", "std::array<int64_t, 4>& _strides"]
+          | type_identifier_list -}}
+   ) const {
+      {% if block_wise -%}
+
+      {% if target is equalto 'gpu' -%}
+
+      const int64_t indexingX = _sizes[0] * _sizes[1];
+      const int64_t indexingY = _sizes[2];
+      const int64_t indexingZ = _sizes[3];
+
+      const int64_t cudaBlockSize0 = 128;
+      const int64_t cudaBlockSize1 = 1;
+      const int64_t cudaBlockSize2 = 1;
+
+      const int64_t _size_0 = _sizes[0];
+
+      const int64_t _size_{{src_field.name}}_0 = _sizes[1];
+      const int64_t _size_{{src_field.name}}_1 = _sizes[2];
+      const int64_t _size_{{src_field.name}}_2 = _sizes[3];
+      const int64_t _size_{{dst_field.name}}_0 = _sizes[1];
+      const int64_t _size_{{dst_field.name}}_1 = _sizes[2];
+      const int64_t _size_{{dst_field.name}}_2 = _sizes[3];
+
+      const int64_t _stride_{{src_field.name}}_0 = _strides[0];
+      const int64_t _stride_{{src_field.name}}_1 = _strides[1];
+      const int64_t _stride_{{src_field.name}}_2 = _strides[2];
+      const int64_t _stride_{{src_field.name}}_3 = _strides[3];
+
+      const int64_t _stride_{{dst_field.name}}_0 = _strides[0];
+      const int64_t _stride_{{dst_field.name}}_1 = _strides[1];
+      const int64_t _stride_{{dst_field.name}}_2 = _strides[2];
+      const int64_t _stride_{{dst_field.name}}_3 = _strides[3];
+
+      const dim3 _block = dim3((unsigned int)((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)), (unsigned int)((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))), (unsigned int)((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))));
+      const dim3 _grid  = dim3((unsigned int)(( (indexingX) % (((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) == 0 ? (int64_t)(indexingX) / (int64_t)(((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) : ( (int64_t)(indexingX) / (int64_t)(((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) ) +1 )), (unsigned int)(( (indexingY) % (((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) == 0 ? (int64_t)(indexingY) / (int64_t)(((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) : ( (int64_t)(indexingY) / (int64_t)(((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) ) +1 )), (unsigned int)(( (indexingZ) % (((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) == 0 ? (int64_t)(indexingZ) / (int64_t)(((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) : ( (int64_t)(indexingZ) / (int64_t)(((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) ) +1 )));
+      {%- endif %}
+
+      {{kernels['localCopyDirection']
+               | generate_call(plain_kernel_call=True, stream='stream')
+               | indent(6) }}
+
+      {%else%}
+      WALBERLA_ABORT("Block wise local communication is not implemented")
+      {%- endif %}
+   }
+
    void {{class_name}}::PackKernels::localCopyDirection(
       {{- [ "PdfField_T * " + src_field.name, "CellInterval & srcInterval",
              "PdfField_T * " + dst_field.name, "CellInterval & dstInterval",
@@ -123,16 +208,42 @@ namespace {{namespace}} {
           | type_identifier_list -}}
    ) const
    {
+      {% if not block_wise -%}
       WALBERLA_ASSERT_EQUAL(srcInterval.xSize(), dstInterval.xSize())
       WALBERLA_ASSERT_EQUAL(srcInterval.ySize(), dstInterval.ySize())
       WALBERLA_ASSERT_EQUAL(srcInterval.zSize(), dstInterval.zSize())
 
       {{kernels['localCopyDirection']
-               | generate_call(cell_interval={src_field : 'srcInterval', dst_field : 'dstInterval'}, stream='stream')
+               | generate_call(cell_interval={src_field.name : 'srcInterval', dst_field.name : 'dstInterval'}, stream='stream')
                | indent(6) }}
+
+      {%else%}
+      WALBERLA_ABORT("Local communication is only implemented block wise")
+      {%- endif %}
    }
 
+
    {% if nonuniform -%}
+   void {{class_name}}::PackKernels::localCopyRedistribute(
+      {{- [  "PdfField_T * " + src_field.name, "CellInterval & srcInterval",
+             "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localCopyRedistribute'].kernel_selection_parameters,
+             ["gpuStream_t stream"] if is_gpu else []]
+          | type_identifier_list -}}
+   ) const
+   {
+      {{kernels['localCopyRedistribute'] | generate_call(cell_interval={src_field.name : 'srcInterval', dst_field.name : 'dstInterval'}, stream='stream') | indent(6) }}
+   }
+
+   void {{class_name}}::PackKernels::localPartialCoalescence(
+      {{- [  "PdfField_T * " + src_field.name, "MaskField_T * " + mask_field.name, "CellInterval & srcInterval",
+             "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localPartialCoalescence'].kernel_selection_parameters,
+             ["gpuStream_t stream"] if is_gpu else []]
+          | type_identifier_list -}}
+   ) const
+   {
+      {{kernels['localPartialCoalescence'] | generate_call(cell_interval={src_field.name : 'srcInterval', mask_field.name : 'srcInterval', dst_field.name : 'dstInterval'}, stream='stream') | indent(6) }}
+   }
+
    void {{class_name}}::PackKernels::unpackRedistribute(
       {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
              "unsigned char * inBuffer", kernels['unpackDirection'].kernel_selection_parameters,
diff --git a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h
index ad40a55eef10c08deb66e6266753a828be9f3452..e10644b69ef614c61033434a26686019b0b1dfe3 100644
--- a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h
+++ b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h
@@ -30,21 +30,13 @@
 #include "stencil/{{stencil_name}}.h"
 #include "stencil/Directions.h"
 
-{% if target is equalto 'cpu' -%}
-#define FUNC_PREFIX
-{%- elif target is equalto 'gpu' -%}
-#define FUNC_PREFIX __global__
+{% if target is equalto 'gpu' -%}
 #include "gpu/GPUWrapper.h"
 #include "gpu/GPUField.h"
 {%- endif %}
 
-#ifdef __GNUC__
-#define RESTRICT __restrict__
-#elif _MSC_VER
-#define RESTRICT __restrict
-#else
-#define RESTRICT
-#endif
+#include <array>
+
 
 #if defined WALBERLA_CXX_COMPILER_IS_GNU || defined WALBERLA_CXX_COMPILER_IS_CLANG
 #pragma GCC diagnostic push
@@ -120,6 +112,7 @@ class {{class_name}}
       {%- endif %}
 
       static const bool inplace = {% if inplace -%} true {%- else -%} false {%- endif -%};
+      static const bool blockWise = {% if block_wise -%} true {%- else -%} false {%- endif -%};
 
       /**
       * Packs all pdfs from the given cell interval to the send buffer.
@@ -178,6 +171,13 @@ class {{class_name}}
       /** Copies data between two blocks on the same process.
         * PDFs streaming aligned with the direction dir are copied from the sending interval onto the receiving interval.
         * */
+      void localCopyDirection(
+         {{- [src_field.dtype.c_name + "** _data_" + src_field.name + "_dp", dst_field.dtype.c_name + "** _data_" + dst_field.name + "_dp",
+                kernels['localCopyDirection'].kernel_selection_parameters,
+                ["gpuStream_t stream"] if is_gpu else [], "std::array<int64_t, 4>& _sizes", "std::array<int64_t, 4>& _strides"]
+             | type_identifier_list -}}
+      ) const;
+
       void localCopyDirection(
          {{- [ "PdfField_T * " + src_field.name, "CellInterval & srcInterval",
                 "PdfField_T * " + dst_field.name, "CellInterval & dstInterval",
@@ -186,6 +186,7 @@ class {{class_name}}
              | type_identifier_list -}}
       ) const;
 
+
       /**
        * Returns the number of bytes that will be packed from / unpacked to the cell interval
        * when using packDirection / unpackDirection
@@ -209,6 +210,26 @@ class {{class_name}}
 
       {% if nonuniform -%}
 
+      /**
+       * Local uniform redistribute.
+       * */
+      void localCopyRedistribute(
+         {{- [  "PdfField_T * " + src_field.name, "CellInterval & srcInterval",
+                "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localCopyRedistribute'].kernel_selection_parameters,
+                ["gpuStream_t stream = nullptr"] if is_gpu else []]
+             | type_identifier_list -}}
+      ) const;
+
+      /**
+       * Local partial coalescence.
+       * */
+      void localPartialCoalescence(
+         {{- [  "PdfField_T * " + src_field.name, "MaskField_T * " + mask_field.name, "CellInterval & srcInterval",
+                "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localPartialCoalescence'].kernel_selection_parameters,
+                ["gpuStream_t stream = nullptr"] if is_gpu else []]
+             | type_identifier_list -}}
+      ) const;
+
       /**
        * Unpacks and uniformly redistributes populations coming from a coarse block onto the fine grid.
        * */
diff --git a/python/lbmpy_walberla/walberla_lbm_package.py b/python/lbmpy_walberla/walberla_lbm_package.py
index ea583181f6c4165863d68a703acd341c8c41d71e..7465a45bb1a968855ac90cea8bc7337ed93d89e4 100644
--- a/python/lbmpy_walberla/walberla_lbm_package.py
+++ b/python/lbmpy_walberla/walberla_lbm_package.py
@@ -3,7 +3,6 @@ from typing import Callable, List, Dict
 from pystencils import Target, Field
 
 from lbmpy.creationfunctions import LbmCollisionRule, LBMConfig, LBMOptimisation
-from lbmpy.relaxationrates import get_shear_relaxation_rate
 
 from pystencils_walberla.cmake_integration import CodeGenerationContext
 
@@ -35,7 +34,7 @@ def generate_lbm_package(ctx: CodeGenerationContext, name: str,
                                        cpu_openmp=cpu_openmp)
 
     if nonuniform:
-        omega = get_shear_relaxation_rate(method)
+        omega = lbm_config.relaxation_rate
         refinement_scaling = RefinementScaling()
         refinement_scaling.add_standard_relaxation_rate_scaling(omega)
     else:
@@ -47,15 +46,15 @@ def generate_lbm_package(ctx: CodeGenerationContext, name: str,
                                   macroscopic_fields=macroscopic_fields,
                                   target=target, data_type=data_type,
                                   cpu_openmp=cpu_openmp, cpu_vectorize_info=cpu_vectorize_info,
-                                  max_threads=max_threads,
-                                  set_pre_collision_pdfs=set_pre_collision_pdfs,
+                                  max_threads=max_threads, set_pre_collision_pdfs=set_pre_collision_pdfs,
                                   **kernel_parameters)
 
     spatial_shape = None
     if lbm_optimisation.symbolic_field and lbm_optimisation.symbolic_field.has_fixed_shape:
         spatial_shape = lbm_optimisation.symbolic_field.spatial_shape + (lbm_config.stencil.Q, )
 
-    generate_boundary_collection(ctx, f'{name}BoundaryCollection', boundary_generators=boundaries,
-                                 lb_method=method, field_name='pdfs', spatial_shape=spatial_shape,
-                                 streaming_pattern=lbm_config.streaming_pattern,
-                                 target=target, layout=lbm_optimisation.field_layout)
+    if boundaries is not None:
+        generate_boundary_collection(ctx, f'{name}BoundaryCollection', boundary_generators=boundaries,
+                                     lb_method=method, field_name='pdfs', spatial_shape=spatial_shape,
+                                     streaming_pattern=lbm_config.streaming_pattern,
+                                     target=target, layout=lbm_optimisation.field_layout)
diff --git a/python/pystencils_walberla/__init__.py b/python/pystencils_walberla/__init__.py
index f78f7fcf244e7fd140cd2abcc93ebaebaea2f94f..158a294523f207daeed3b977408a08a2157ece72 100644
--- a/python/pystencils_walberla/__init__.py
+++ b/python/pystencils_walberla/__init__.py
@@ -1,4 +1,4 @@
-from .boundary import generate_staggered_boundary, generate_staggered_flux_boundary
+from .boundary import generate_boundary, generate_staggered_boundary, generate_staggered_flux_boundary
 from .cmake_integration import CodeGeneration, ManualCodeGenerationContext
 
 from .function_generator import function_generator
@@ -8,7 +8,7 @@ from .pack_info import (generate_pack_info, generate_pack_info_for_field,
                         generate_pack_info_from_kernel, generate_mpidtype_info_from_kernel)
 from .utility import generate_info_header, get_vectorize_instruction_set, config_from_context
 
-__all__ = ['generate_staggered_boundary', 'generate_staggered_flux_boundary',
+__all__ = ['generate_boundary', 'generate_staggered_boundary', 'generate_staggered_flux_boundary',
            'CodeGeneration', 'ManualCodeGenerationContext',
            'function_generator',
            'generate_sweep', 'generate_selective_sweep', 'generate_sweep_collection',
diff --git a/python/pystencils_walberla/boundary.py b/python/pystencils_walberla/boundary.py
index 7af79ed677697633c099a5dec78b2b9afb66a226..6e4ff76bb0414f58aa0b81da8733bd8dc85435c5 100644
--- a/python/pystencils_walberla/boundary.py
+++ b/python/pystencils_walberla/boundary.py
@@ -67,8 +67,16 @@ def generate_boundary(generation_context,
     if not kernel_creation_function:
         kernel_creation_function = create_boundary_kernel
 
-    kernel = kernel_creation_function(field, index_field, neighbor_stencil, boundary_object,
-                                      target=target, **create_kernel_params)
+    bc_force = hasattr(boundary_object, "calculate_force_on_boundary") and boundary_object.calculate_force_on_boundary
+    if bc_force:
+        force_vector_type = np.dtype([(f"F_{i}", np.float64) for i in range(dim)], align=True)
+        force_vector = Field('forceVector', FieldType.INDEXED, force_vector_type, layout=[0],
+                             shape=(TypedSymbol("forceVectorSize", create_type("int32")), 1), strides=(1, 1))
+        kernel = kernel_creation_function(field, index_field, neighbor_stencil, boundary_object,
+                                          target=target, force_vector=force_vector, **create_kernel_params)
+    else:
+        kernel = kernel_creation_function(field, index_field, neighbor_stencil, boundary_object,
+                                          target=target, **create_kernel_params)
 
     if isinstance(kernel, KernelFunction):
         kernel.function_name = f"boundary_{boundary_object.name}"
@@ -103,7 +111,8 @@ def generate_boundary(generation_context,
         'additional_data_handler': additional_data_handler,
         'dtype': "double" if is_float else "float",
         'layout': layout,
-        'index_shape': index_shape
+        'index_shape': index_shape,
+        'calculate_force': bc_force
     }
 
     env = Environment(loader=PackageLoader('pystencils_walberla'), undefined=StrictUndefined)
diff --git a/python/pystencils_walberla/jinja_filters.py b/python/pystencils_walberla/jinja_filters.py
index 6d05bf8ffd51808821311bf09f0db263570983b9..d83ed6bba4197659ae447f773092111bcf7298ec 100644
--- a/python/pystencils_walberla/jinja_filters.py
+++ b/python/pystencils_walberla/jinja_filters.py
@@ -9,27 +9,24 @@ import sympy as sp
 
 from pystencils import Target, Backend
 from pystencils.backends.cbackend import generate_c
-from pystencils.typing import TypedSymbol, get_base_type
+from pystencils.typing import TypedSymbol, get_base_type, PointerType
 from pystencils.field import FieldType
 from pystencils.sympyextensions import prod
 
 temporary_fieldPointerTemplate = """{type}"""
 
-temporary_fieldMemberTemplate = """
-private: std::set< {type} *, field::SwapableCompare< {type} * > > cache_{original_field_name}_;"""
+temporary_fieldMemberTemplate = "std::unordered_map<IBlock*, {type} *> cache_{original_field_name}_;"
 
 temporary_fieldTemplate = """
 {{
-    // Getting temporary field {tmp_field_name}
-    auto it = cache_{original_field_name}_.find( {original_field_name} );
-    if( it != cache_{original_field_name}_.end() )
+    if (cache_{original_field_name}_.find(block) == cache_{original_field_name}_.end())
     {{
-        {tmp_field_name} = *it;
+        {tmp_field_name} = {original_field_name}->cloneUninitialized();
+        cache_{original_field_name}_[block] = {tmp_field_name};
     }}
     else
     {{
-        {tmp_field_name} = {original_field_name}->cloneUninitialized();
-        cache_{original_field_name}_.insert({tmp_field_name});
+        {tmp_field_name} = cache_{original_field_name}_[block];
     }}
 }}
 """
@@ -40,7 +37,7 @@ temporary_constructor = """
 
 delete_loop = """
     for(auto p: cache_{original_field_name}_) {{
-        delete p;
+        delete p.second;
     }}
 """
 
@@ -226,17 +223,23 @@ def generate_block_data_to_field_extraction(ctx, kernel_info, parameters_to_igno
 
 
 def generate_refs_for_kernel_parameters(kernel_info, prefix, parameters_to_ignore=(), ignore_fields=False,
-                                        parameter_registration=None):
+                                        parameter_registration=None, level_known=False):
+
+    pointer_symbols = {p.symbol.name for p in kernel_info.parameters
+                       if not p.is_field_parameter and isinstance(p.symbol.dtype, PointerType)}
     symbols = {p.field_name for p in kernel_info.parameters if p.is_field_pointer and not ignore_fields}
     symbols.update(p.symbol.name for p in kernel_info.parameters if not p.is_field_parameter)
     symbols.difference_update(parameters_to_ignore)
+    if ignore_fields:
+        symbols.difference_update(pointer_symbols)
     type_information = {p.symbol.name: p.symbol.dtype for p in kernel_info.parameters if not p.is_field_parameter}
     result = []
     registered_parameters = [] if not parameter_registration else parameter_registration.scaling_info
     for s in symbols:
         if s in registered_parameters:
             dtype = type_information[s].c_name
-            result.append("const uint_t level = block->getBlockStorage().getLevel(*block);")
+            if not level_known:
+                result.append("const uint_t level = block->getBlockStorage().getLevel(*block);")
             result.append(f"{dtype} & {s} = {s}Vector[level];")
         else:
             result.append(f"auto & {s} = {prefix}{s}_;")
@@ -245,7 +248,7 @@ def generate_refs_for_kernel_parameters(kernel_info, prefix, parameters_to_ignor
 
 @jinja2_context_decorator
 def generate_call(ctx, kernel, ghost_layers_to_include=0, cell_interval=None, stream='0',
-                  spatial_shape_symbols=()):
+                  spatial_shape_symbols=(), plain_kernel_call=False):
     """Generates the function call to a pystencils kernel
 
     Args:
@@ -265,6 +268,20 @@ def generate_call(ctx, kernel, ghost_layers_to_include=0, cell_interval=None, st
                                parameters - however in special cases like boundary conditions a manual specification
                                may be necessary.
     """
+    ast_params = kernel.parameters
+    if len(spatial_shape_symbols) == 0:
+        for param in ast_params:
+            if param.is_field_parameter and FieldType.is_indexed(param.fields[0]):
+                continue
+            if param.is_field_pointer:
+                field = param.fields[0]
+                if field.has_fixed_shape:
+                    spatial_shape_symbols = field.spatial_shape
+
+    if plain_kernel_call:
+        return kernel.generate_kernel_invocation_code(plain_kernel_call=True, stream=stream,
+                                                      spatial_shape_symbols=spatial_shape_symbols)
+
     assert isinstance(ghost_layers_to_include, str) or ghost_layers_to_include >= 0
     ast_params = kernel.parameters
     vec_info = ctx.get('cpu_vectorize_info', None)
@@ -296,7 +313,7 @@ def generate_call(ctx, kernel, ghost_layers_to_include=0, cell_interval=None, st
         if isinstance(cell_interval, str):
             return cell_interval
         elif isinstance(cell_interval, dict):
-            return cell_interval[field_object]
+            return cell_interval[field_object.name]
         else:
             return None
 
@@ -591,7 +608,8 @@ def generate_members(ctx, kernel_infos, parameters_to_ignore=None, only_fields=F
             original_field_name = field_name[:-len('_tmp')]
             f_size = get_field_fsize(f)
             field_type = make_field_type(get_base_type(f.dtype), f_size, is_gpu)
-            result.append(temporary_fieldMemberTemplate.format(type=field_type, original_field_name=original_field_name))
+            result.append(temporary_fieldMemberTemplate.format(type=field_type,
+                                                               original_field_name=original_field_name))
 
     for kernel_info in kernel_infos:
         if hasattr(kernel_info, 'varying_parameters'):
@@ -734,6 +752,16 @@ def generate_constructor(ctx, kernel_infos, parameter_registration):
     return "\n".join(result)
 
 
+@jinja2_context_decorator
+def generate_field_strides(ctx, kernel_info):
+    result = []
+    for param in kernel_info.parameters:
+        if param.is_field_stride:
+            type_str = param.symbol.dtype.c_name
+            result.append(f"const {type_str} {param.symbol.name} = {param.symbol.name[1:]};")
+    return "\n".join(result)
+
+
 def generate_list_of_expressions(expressions, prepend=''):
     if len(expressions) == 0:
         return ''
@@ -806,3 +834,4 @@ def add_pystencils_filters_to_jinja_env(jinja_env):
     jinja_env.filters['identifier_list'] = identifier_list
     jinja_env.filters['list_of_expressions'] = generate_list_of_expressions
     jinja_env.filters['field_type'] = field_type
+    jinja_env.filters['generate_field_strides'] = generate_field_strides
diff --git a/python/pystencils_walberla/kernel_info.py b/python/pystencils_walberla/kernel_info.py
index 586c05abe21dcc07e5920ba8cc759120089a4e4e..698b0511fe83cdf73dfec09d2d28a7d7a4e7ce95 100644
--- a/python/pystencils_walberla/kernel_info.py
+++ b/python/pystencils_walberla/kernel_info.py
@@ -32,13 +32,20 @@ class KernelInfo:
         all_headers = [list(get_headers(self.ast))]
         return reduce(merge_sorted_lists, all_headers)
 
-    def generate_kernel_invocation_code(self, **kwargs):
+    def generate_kernel_invocation_code(self, plain_kernel_call=False, **kwargs):
         ast = self.ast
         ast_params = self.parameters
         fnc_name = ast.function_name
         is_cpu = self.ast.target == Target.CPU
         call_parameters = ", ".join([p.symbol.name for p in ast_params])
 
+        if plain_kernel_call:
+            if is_cpu:
+                return f"internal_{fnc_name}::{fnc_name}({call_parameters});"
+            else:
+                stream = kwargs.get('stream', '0')
+                return f"internal_{fnc_name}::{fnc_name}<<<_grid, _block, 0, {stream}>>>({call_parameters});"
+
         if not is_cpu:
             stream = kwargs.get('stream', '0')
             spatial_shape_symbols = kwargs.get('spatial_shape_symbols', ())
diff --git a/python/pystencils_walberla/kernel_selection.py b/python/pystencils_walberla/kernel_selection.py
index ad8a99867e0970b102409823f6a17258983bae2b..3195624f6bb82a665738d3ba00950f3b3eab2978 100644
--- a/python/pystencils_walberla/kernel_selection.py
+++ b/python/pystencils_walberla/kernel_selection.py
@@ -157,6 +157,21 @@ class SwitchNode(AbstractKernelSelectionNode):
         return switch_code
 
 
+class AbortNode(AbstractKernelSelectionNode):
+    def __init__(self, message):
+        self.message = message
+
+    @property
+    def selection_parameters(self):
+        return set()
+
+    def collect_kernel_calls(self):
+        return set()
+
+    def get_code(self, **kwargs):
+        return f'WALBERLA_ABORT("{self.message}")'
+
+
 class KernelCallNode(AbstractKernelSelectionNode):
     def __init__(self, ast):
         self.ast = ast
@@ -169,13 +184,20 @@ class KernelCallNode(AbstractKernelSelectionNode):
     def collect_kernel_calls(self):
         return {self}
 
-    def get_code(self, **kwargs):
+    def get_code(self, plain_kernel_call=False, **kwargs):
         ast = self.ast
         ast_params = self.parameters
         fnc_name = ast.function_name
         is_cpu = self.ast.target == Target.CPU
         call_parameters = ", ".join([p.symbol.name for p in ast_params])
 
+        if plain_kernel_call:
+            if is_cpu:
+                return f"internal_{fnc_name}::{fnc_name}({call_parameters});"
+            else:
+                stream = kwargs.get('stream', '0')
+                return f"internal_{fnc_name}::{fnc_name}<<<_grid, _block, 0, {stream}>>>({call_parameters});"
+
         if not is_cpu:
             stream = kwargs.get('stream', '0')
             spatial_shape_symbols = kwargs.get('spatial_shape_symbols', ())
diff --git a/python/pystencils_walberla/templates/Boundary.tmpl.cpp b/python/pystencils_walberla/templates/Boundary.tmpl.cpp
index 644202ba67cd574724e46ef2b42e60535dc2e5c6..3ece9a917506feae344265e58c61171c8866f1cd 100644
--- a/python/pystencils_walberla/templates/Boundary.tmpl.cpp
+++ b/python/pystencils_walberla/templates/Boundary.tmpl.cpp
@@ -82,10 +82,23 @@ void {{class_name}}::run_impl(
 
    uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer);
 
-   {{kernel|generate_block_data_to_field_extraction(['indexVector', 'indexVectorSize'])|indent(4)}}
+   {% if calculate_force -%}
+   auto * forceVector = block->getData<ForceVector>(forceVectorID);
+   WALBERLA_ASSERT_EQUAL(indexVectorSize, int32_c( forceVector->forceVector().size() ))
+
+   {% if target == 'gpu' -%}
+   auto forcePointer = forceVector->pointerGpu();
+   int32_t forceVectorSize = int32_c( forceVector->forceVector().size() );
+   {% else %}
+   auto forcePointer = forceVector->pointerCpu();
+   {% endif %}
+   uint8_t * _data_forceVector = reinterpret_cast<uint8_t*>(forcePointer);
+   {%- endif %}
+
+   {{kernel|generate_block_data_to_field_extraction(['indexVector', 'indexVectorSize', 'forceVector', 'forceVectorSize'])|indent(4)}}
    {{kernel|generate_timestep_advancements|indent(4)}}
-   {{kernel|generate_refs_for_kernel_parameters(prefix='', parameters_to_ignore=['indexVectorSize'], ignore_fields=True)|indent(4) }}
-   {{kernel|generate_call(spatial_shape_symbols=['indexVectorSize'], stream='stream')|indent(4)}}
+   {{kernel|generate_refs_for_kernel_parameters(prefix='', parameters_to_ignore=['indexVectorSize', 'forceVectorSize'], ignore_fields=True)|indent(4) }}
+   {{kernel|generate_call(spatial_shape_symbols=['indexVectorSize', 'forceVectorSize'], stream='stream')|indent(4)}}
 }
 
 void {{class_name}}::run(
diff --git a/python/pystencils_walberla/templates/Boundary.tmpl.h b/python/pystencils_walberla/templates/Boundary.tmpl.h
index 704a72274d802f3afe388bf992ba802b19afcc9b..43fd409eaaa9ba80cfb79bfc45016a5e02f13efb 100644
--- a/python/pystencils_walberla/templates/Boundary.tmpl.h
+++ b/python/pystencils_walberla/templates/Boundary.tmpl.h
@@ -110,6 +110,9 @@ public:
             {%- endif %}
         }
 
+
+
+
     private:
         std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
 
@@ -119,12 +122,88 @@ public:
         {%- endif %}
     };
 
+    {% if calculate_force -%}
+
+    struct ForceStruct {
+       double F_0;
+       double F_1;
+       double F_2;
+       ForceStruct() : F_0(double_c(0.0)), F_1(double_c(0.0)), F_2(double_c(0.0)) {}
+       bool operator==(const ForceStruct & o) const {
+          return floatIsEqual(F_0, o.F_0) && floatIsEqual(F_1, o.F_1) && floatIsEqual(F_2, o.F_2);
+       }
+    };
+
+    class ForceVector
+    {
+     public:
+       ForceVector() = default;
+       bool operator==(ForceVector const &other) const { return other.cpuVector_ == cpuVector_; }
+
+       {% if target == 'gpu' -%}
+       ~ForceVector() {if(!gpuVector_.empty()){WALBERLA_GPU_CHECK(gpuFree( gpuVector_[0] ))}}
+       {% endif -%}
+
+       std::vector<ForceStruct> & forceVector() { return cpuVector_; }
+       ForceStruct * pointerCpu()  { return cpuVector_.data(); }
+       bool empty() {return cpuVector_.empty();}
+
+       {% if target == 'gpu' -%}
+       ForceStruct * pointerGpu()  { return gpuVector_[0]; }
+       {% endif -%}
+
+       Vector3<double> getForce()
+       {
+          syncCPU();
+          Vector3<double> result(double_c(0.0));
+          for(std::vector<ForceStruct>::iterator it = cpuVector_.begin(); it != cpuVector_.end(); ++it)
+          {
+             result[0] += it->F_0;
+             result[1] += it->F_1;
+             result[2] += it->F_2;
+          }
+          return result;
+       }
+
+       void syncGPU()
+       {
+          {% if target == 'gpu' -%}
+          if(!gpuVector_.empty()){WALBERLA_GPU_CHECK(gpuFree( gpuVector_[0] ))}
+          if(!cpuVector_.empty())
+          {
+             gpuVector_.resize(cpuVector_.size());
+             WALBERLA_GPU_CHECK(gpuMalloc(&gpuVector_[0], sizeof(ForceStruct) * cpuVector_.size()))
+             WALBERLA_GPU_CHECK(gpuMemcpy(gpuVector_[0], &cpuVector_[0], sizeof(ForceStruct) * cpuVector_.size(), gpuMemcpyHostToDevice))
+          }
+          {%- endif %}
+       }
+
+       void syncCPU()
+       {
+          {% if target == 'gpu' -%}
+          WALBERLA_GPU_CHECK(gpuMemcpy( &cpuVector_[0], gpuVector_[0] , sizeof(ForceStruct) * cpuVector_.size(), gpuMemcpyDeviceToHost ))
+          {%- endif %}
+       }
+
+     private:
+       std::vector<ForceStruct> cpuVector_;
+       {% if target == 'gpu' -%}
+       std::vector<ForceStruct *> gpuVector_;
+       {%- endif %}
+    };
+
+    {%- endif %}
+
     {{class_name}}( const shared_ptr<StructuredBlockForest> & blocks,
-                   {{kernel|generate_constructor_parameters(['indexVector', 'indexVectorSize'])}}{{additional_data_handler.constructor_arguments}})
-        :{{additional_data_handler.initialiser_list}} {{ kernel|generate_constructor_initializer_list(['indexVector', 'indexVectorSize']) }}
+                   {{kernel|generate_constructor_parameters(['indexVector', 'indexVectorSize', 'forceVector', 'forceVectorSize'])}}{{additional_data_handler.constructor_arguments}})
+        :{{additional_data_handler.initialiser_list}} {{ kernel|generate_constructor_initializer_list(['indexVector', 'indexVectorSize', 'forceVector', 'forceVectorSize']) }}
     {
         auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); };
         indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_{{class_name}}");
+        {% if calculate_force -%}
+        auto createForceVector = []( IBlock * const , StructuredBlockStorage * const ) { return new ForceVector(); };
+        forceVectorID = blocks->addStructuredBlockData< ForceVector >( createForceVector, "forceVector_{{class_name}}");
+        {%- endif %}
     };
 
     void run (
@@ -148,6 +227,19 @@ public:
         {{- ["IBlock * block", kernel.kernel_selection_parameters, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}}
     );
 
+    Vector3<double> getForce(IBlock * {% if calculate_force -%}block{%else%}/*block*/{%- endif %})
+    {
+       {% if calculate_force -%}
+       auto * forceVector = block->getData<ForceVector>(forceVectorID);
+       if(forceVector->empty())
+          return Vector3<double>(double_c(0.0));
+       return forceVector->getForce();
+       {% else %}
+       WALBERLA_ABORT("Boundary condition was not generated including force calculation.")
+       return Vector3<double>(double_c(0.0));
+       {%- endif %}
+    }
+
     std::function<void (IBlock *)> getSweep( {{- [interface_spec.high_level_args, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} )
     {
         return [ {{- ["this", interface_spec.high_level_args, ["stream"] if target == 'gpu' else []] | identifier_list -}} ]
@@ -186,6 +278,9 @@ public:
         auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
         auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
         auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+        {% if calculate_force -%}
+        auto * forceVector = block->getData< ForceVector > ( forceVectorID );
+        {%- endif %}
 
         auto * flagField = block->getData< FlagField_T > ( flagFieldID );
         {{additional_data_handler.additional_field_data|indent(4)}}
@@ -300,6 +395,10 @@ public:
         {% endif %}
 
         indexVectors->syncGPU();
+        {% if calculate_force -%}
+        forceVector->forceVector().resize(indexVectorAll.size());
+        forceVector->syncGPU();
+        {%- endif %}
     }
 
 private:
@@ -310,9 +409,12 @@ private:
    );
 
     BlockDataID indexVectorID;
+    {% if calculate_force -%}
+    BlockDataID forceVectorID;
+    {%- endif %}
     {{additional_data_handler.additional_member_variable|indent(4)}}
 public:
-    {{kernel|generate_members(('indexVector', 'indexVectorSize'))|indent(4)}}
+    {{kernel|generate_members(('indexVector', 'indexVectorSize', 'forceVector', 'forceVectorSize'))|indent(4)}}
 };
 
 
diff --git a/src/blockforest/AABBRefinementSelection.h b/src/blockforest/AABBRefinementSelection.h
index 45847dca596dc84415629404a8ac622684a3e8ec..f92b062d52fea38c889356d80e7f2e66e3ddffa3 100644
--- a/src/blockforest/AABBRefinementSelection.h
+++ b/src/blockforest/AABBRefinementSelection.h
@@ -46,7 +46,7 @@ public:
    {
       if( configBlock )
       {
-         auto refinementBlock = configBlock.getBlock( "AABBRefinementSelection" );
+         auto refinementBlock = configBlock.getKey() == "AABBRefinementSelection" ? configBlock : configBlock.getBlock( "AABBRefinementSelection" );
 
          if( refinementBlock )
          {
diff --git a/src/blockforest/communication/UniformBufferedScheme.h b/src/blockforest/communication/UniformBufferedScheme.h
index 1693d0d687e6700c1d6bd0bfcb6dc433be59d192..e7cbd6c631ad0751f486ab4071aee310d58ef06f 100644
--- a/src/blockforest/communication/UniformBufferedScheme.h
+++ b/src/blockforest/communication/UniformBufferedScheme.h
@@ -158,9 +158,9 @@ public:
    void startCommunication();
    void wait();
 
+   std::function<void()> getCommunicateFunctor();
    std::function<void()> getStartCommunicateFunctor();
    std::function<void()> getWaitFunctor();
-   std::function<void()> getCommunicateFunctor();
    //@}
    //*******************************************************************************************************************
 
@@ -535,21 +535,21 @@ void UniformBufferedScheme<Stencil>::localBufferUnpacking( const uint_t index, c
 }
 
 template< typename Stencil >
-std::function<void()> UniformBufferedScheme<Stencil>::getStartCommunicateFunctor()
+std::function<void()> UniformBufferedScheme<Stencil>::getCommunicateFunctor()
 {
-   return std::bind( &UniformBufferedScheme::startCommunication, this );
+   return std::bind( &UniformBufferedScheme::communicate, this );
 }
 
 template< typename Stencil >
-std::function<void()> UniformBufferedScheme<Stencil>::getWaitFunctor()
+std::function<void()> UniformBufferedScheme<Stencil>::getStartCommunicateFunctor()
 {
-   return std::bind( &UniformBufferedScheme::wait, this );
+   return std::bind( &UniformBufferedScheme::startCommunication, this );
 }
 
 template< typename Stencil >
-std::function<void()> UniformBufferedScheme<Stencil>::getCommunicateFunctor()
+std::function<void()> UniformBufferedScheme<Stencil>::getWaitFunctor()
 {
-   return std::bind( &UniformBufferedScheme::communicate, this );
+   return std::bind( &UniformBufferedScheme::wait, this );
 }
 
 } // namespace communication
diff --git a/src/gpu/communication/GeneratedNonUniformGPUPackInfo.h b/src/gpu/communication/GeneratedNonUniformGPUPackInfo.h
index a5dc8d8cfccf70decacc7b33ecb5318e0490b933..de398e65491f1efa7e65c590f57f1fee8c950e0e 100644
--- a/src/gpu/communication/GeneratedNonUniformGPUPackInfo.h
+++ b/src/gpu/communication/GeneratedNonUniformGPUPackInfo.h
@@ -46,6 +46,8 @@ class GeneratedNonUniformGPUPackInfo
 
    inline void packDataEqualLevel( const Block * sender, stencil::Direction dir, GpuBuffer_T & buffer, gpuStream_t stream = nullptr) const;
    virtual void unpackDataEqualLevel( Block * receiver, stencil::Direction dir, GpuBuffer_T & buffer, gpuStream_t stream) = 0;
+   virtual void addForLocalEqualLevelComm(const Block* sender, Block* receiver, stencil::Direction dir) = 0;
+   virtual void communicateLocalEqualLevel(uint64_t level, uint8_t timestep, gpuStream_t stream) = 0;
    virtual void communicateLocalEqualLevel( const Block * sender, Block * receiver, stencil::Direction dir, gpuStream_t stream) = 0;
 
    inline  void packDataCoarseToFine        ( const Block * coarseSender, const BlockID & fineReceiver, stencil::Direction dir, GpuBuffer_T & buffer, gpuStream_t stream = nullptr) const;
@@ -63,6 +65,7 @@ class GeneratedNonUniformGPUPackInfo
    virtual uint_t sizeCoarseToFineReceive ( Block* fineReceiver, stencil::Direction dir) = 0;
    virtual uint_t sizeFineToCoarseSend ( const Block * fineSender, stencil::Direction dir) = 0;
 
+   virtual void sync() = 0;
 
 #ifndef NDEBUG
    void clearBufferSizeCheckMap() { bufferSize_.clear(); }
diff --git a/src/gpu/communication/NonUniformGPUScheme.h b/src/gpu/communication/NonUniformGPUScheme.h
index b872be1d0c80e3537971b49434d64033373a1822..ddd9fafe971e809e476595f5b67caac7340623d6 100644
--- a/src/gpu/communication/NonUniformGPUScheme.h
+++ b/src/gpu/communication/NonUniformGPUScheme.h
@@ -90,6 +90,8 @@ class NonUniformGPUScheme
    inline void waitCommunicateCoarseToFine(uint_t fineLevel);
    inline void waitCommunicateFineToCoarse(uint_t fineLevel);
 
+   inline void setTimestepForLevel(uint_t level, uint8_t timestep) {timestepPerLevel_[level] = timestep;}
+
  private:
    void setupCommunication();
 
@@ -128,6 +130,7 @@ class NonUniformGPUScheme
    Set< SUID > incompatibleBlockSelectors_;
 
    gpuStream_t streams_[Stencil::Q];
+   std::vector< uint8_t > timestepPerLevel_;
 };
 
 template< typename Stencil >
@@ -196,6 +199,7 @@ void NonUniformGPUScheme< Stencil >::refresh()
    WALBERLA_CHECK_NOT_NULLPTR(forest,
                               "Trying to access communication for a block storage object that doesn't exist anymore")
    const uint_t levels = forest->getNumberOfLevels();
+   timestepPerLevel_.resize(levels);
 
    for (uint_t i = 0; i != 3; ++i)
    {
@@ -313,6 +317,13 @@ void NonUniformGPUScheme< Stencil >::startCommunicationEqualLevel(const uint_t i
       for (auto it : headers_[EQUAL_LEVEL][index])
          bufferSystemGPU_[EQUAL_LEVEL][index].sendBuffer(it.first).clear();
 
+   // If localCommunication is generated blockwise it is executed here.
+   for (auto level : participatingLevels){
+      for (auto& pi : packInfos_){
+         pi->communicateLocalEqualLevel(level, timestepPerLevel_[level], streams_[0]);
+      }
+   }
+
    // Start filling send buffers
    for (auto& iBlock : *forest)
    {
@@ -369,13 +380,12 @@ void NonUniformGPUScheme< Stencil >::startCommunicationEqualLevel(const uint_t i
          }
       }
    }
+
    // wait for packing to finish
-   for (uint_t i = 0; i < Stencil::Q; ++i)
-   {
+   for (uint_t i = 0; i < Stencil::Q; ++i){
       WALBERLA_GPU_CHECK(gpuStreamSynchronize(streams_[i]))
    }
 
-
    if (sendFromGPU_)
       bufferSystemGPU_[EQUAL_LEVEL][index].sendAll();
    else
@@ -836,8 +846,14 @@ void NonUniformGPUScheme< Stencil >::setupCommunication()
             if (!selectable::isSetSelected(block->getNeighborState(neighborIdx, uint_t(0)), requiredBlockSelectors_,
                                            incompatibleBlockSelectors_))
                continue;
-            if( block->neighborExistsLocally( neighborIdx, uint_t(0) ) )
+
+            if( block->neighborExistsLocally( neighborIdx, uint_t(0) ) ){
+               auto receiverBlock = dynamic_cast< Block * >( forest->getBlock( block->getNeighborId( neighborIdx, uint_t(0) )) );
+               for (auto& pi : packInfos_){
+                  pi->addForLocalEqualLevelComm(block, receiverBlock, *dir);
+               }
                continue;
+            }
 
             const BlockID& receiverId = block->getNeighborId(neighborIdx, uint_t(0));
             auto nProcess             = mpi::MPIRank(block->getNeighborProcess(neighborIdx, uint_t(0)));
@@ -915,6 +931,10 @@ void NonUniformGPUScheme< Stencil >::setupCommunication()
       }
    }
 
+   for (auto& pi : packInfos_){
+      pi->sync();
+   }
+
    for (uint_t i = 0; i != 3; ++i)
    {
       for (uint_t j = 0; j <= levels; ++j)
diff --git a/src/lbm_generated/boundary/D3Q19BoundaryCollection.h b/src/lbm_generated/boundary/D3Q19BoundaryCollection.h
index eb1a23fb52be36ec0471bf05989512724acdc477..cca616539d470e81f5a8c0e5319d8696f89d9b5f 100644
--- a/src/lbm_generated/boundary/D3Q19BoundaryCollection.h
+++ b/src/lbm_generated/boundary/D3Q19BoundaryCollection.h
@@ -22,6 +22,8 @@
 #include "core/DataTypes.h"
 #include "domain_decomposition/IBlock.h"
 
+
+
 #include "OutflowD3Q19.h"
 #include "FixedDensityD3Q19.h"
 #include "FreeSlipD3Q19.h"
@@ -43,11 +45,11 @@ class D3Q19BoundaryCollection
    D3Q19BoundaryCollection(const shared_ptr<StructuredBlockForest> & blocks, BlockDataID flagID_, BlockDataID pdfsID_, FlagUID domainUID_, double density, double u_x, double u_y, double u_z)
       : blocks_(blocks), flagID(flagID_), pdfsID(pdfsID_), domainUID(domainUID_)
    {
-      OutflowD3Q19Object = std::make_shared< lbm::OutflowD3Q19 >(blocks, pdfsID);
-      FixedDensityD3Q19Object = std::make_shared< lbm::FixedDensityD3Q19 >(blocks, pdfsID, density);
-      FreeSlipD3Q19Object = std::make_shared< lbm::FreeSlipD3Q19 >(blocks, pdfsID);
-      NoSlipD3Q19Object = std::make_shared< lbm::NoSlipD3Q19 >(blocks, pdfsID);
-      UBBD3Q19Object = std::make_shared< lbm::UBBD3Q19 >(blocks, pdfsID, u_x, u_y, u_z);
+      OutflowD3Q19Object = std::make_shared< lbm::OutflowD3Q19 >(blocks, pdfsID_);
+      FixedDensityD3Q19Object = std::make_shared< lbm::FixedDensityD3Q19 >(blocks, pdfsID_, density);
+      FreeSlipD3Q19Object = std::make_shared< lbm::FreeSlipD3Q19 >(blocks, pdfsID_);
+      NoSlipD3Q19Object = std::make_shared< lbm::NoSlipD3Q19 >(blocks, pdfsID_);
+      UBBD3Q19Object = std::make_shared< lbm::UBBD3Q19 >(blocks, pdfsID_, u_x, u_y, u_z);
       
 
       OutflowD3Q19Object->fillFromFlagField<FlagField_T>(blocks, flagID, walberla::FlagUID("Outflow"), domainUID);
@@ -120,4 +122,4 @@ class D3Q19BoundaryCollection
 };
 
 }
-}
+}
\ No newline at end of file
diff --git a/src/lbm_generated/boundary/D3Q27BoundaryCollection.h b/src/lbm_generated/boundary/D3Q27BoundaryCollection.h
index 3428689bda22764cf3552e641d4c1f2656bab37a..cb7a039d086ad03d28cc306b1ec4e02ac77cdd77 100644
--- a/src/lbm_generated/boundary/D3Q27BoundaryCollection.h
+++ b/src/lbm_generated/boundary/D3Q27BoundaryCollection.h
@@ -22,6 +22,8 @@
 #include "core/DataTypes.h"
 #include "domain_decomposition/IBlock.h"
 
+
+
 #include "OutflowD3Q27.h"
 #include "FixedDensityD3Q27.h"
 #include "FreeSlipD3Q27.h"
@@ -43,11 +45,11 @@ class D3Q27BoundaryCollection
    D3Q27BoundaryCollection(const shared_ptr<StructuredBlockForest> & blocks, BlockDataID flagID_, BlockDataID pdfsID_, FlagUID domainUID_, double density, double u_x, double u_y, double u_z)
       : blocks_(blocks), flagID(flagID_), pdfsID(pdfsID_), domainUID(domainUID_)
    {
-      OutflowD3Q27Object = std::make_shared< lbm::OutflowD3Q27 >(blocks, pdfsID);
-      FixedDensityD3Q27Object = std::make_shared< lbm::FixedDensityD3Q27 >(blocks, pdfsID, density);
-      FreeSlipD3Q27Object = std::make_shared< lbm::FreeSlipD3Q27 >(blocks, pdfsID);
-      NoSlipD3Q27Object = std::make_shared< lbm::NoSlipD3Q27 >(blocks, pdfsID);
-      UBBD3Q27Object = std::make_shared< lbm::UBBD3Q27 >(blocks, pdfsID, u_x, u_y, u_z);
+      OutflowD3Q27Object = std::make_shared< lbm::OutflowD3Q27 >(blocks, pdfsID_);
+      FixedDensityD3Q27Object = std::make_shared< lbm::FixedDensityD3Q27 >(blocks, pdfsID_, density);
+      FreeSlipD3Q27Object = std::make_shared< lbm::FreeSlipD3Q27 >(blocks, pdfsID_);
+      NoSlipD3Q27Object = std::make_shared< lbm::NoSlipD3Q27 >(blocks, pdfsID_);
+      UBBD3Q27Object = std::make_shared< lbm::UBBD3Q27 >(blocks, pdfsID_, u_x, u_y, u_z);
       
 
       OutflowD3Q27Object->fillFromFlagField<FlagField_T>(blocks, flagID, walberla::FlagUID("Outflow"), domainUID);
@@ -120,4 +122,4 @@ class D3Q27BoundaryCollection
 };
 
 }
-}
+}
\ No newline at end of file
diff --git a/src/lbm_generated/boundary/FixedDensityD3Q19.cpp b/src/lbm_generated/boundary/FixedDensityD3Q19.cpp
index e449704f5a0bfa4932344fef2a8cab378770592f..9bfdd22eea1f6d7ce87598d7ea4bb5d11d52b7f4 100644
--- a/src/lbm_generated/boundary/FixedDensityD3Q19.cpp
+++ b/src/lbm_generated/boundary/FixedDensityD3Q19.cpp
@@ -45,9 +45,9 @@ namespace lbm {
 #pragma diag_suppress 177
 #endif
 #endif
-
+//NOLINTBEGIN(readability-non-const-parameter*)
 namespace internal_fixeddensityd3q19_even {
-static FUNC_PREFIX void fixeddensityd3q19_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, double density, int32_t indexVectorSize)
+static FUNC_PREFIX void fixeddensityd3q19_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, double density, int32_t indexVectorSize)
 {
    
    const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; 
@@ -59,29 +59,29 @@ static FUNC_PREFIX void fixeddensityd3q19_even(const uint8_t * RESTRICT const _d
    const double delta_rho = rho - 1.0;
    for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
    {
-      const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0]));
-      const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4]));
-      const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8]));
-      const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12]));
+      const int32_t x = *((int32_t *  )(& _data_indexVector[16*ctr_0]));
+      const int32_t y = *((int32_t *  )(& _data_indexVector[16*ctr_0 + 4]));
+      const int32_t z = *((int32_t *  )(& _data_indexVector[16*ctr_0 + 8]));
+      const int32_t dir = *((int32_t *  )(& _data_indexVector[16*ctr_0 + 12]));
       const double vel0Term = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 10*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 14*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 4*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 8*_stride_pdfs_3];
       const double vel1Term = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 7*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3];
       const double vel2Term = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 12*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 13*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 5*_stride_pdfs_3];
-      const double u_0 = vel0Term - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 13*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 3*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 7*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3];
-      const double u_1 = vel1Term - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 10*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 12*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 2*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 8*_stride_pdfs_3];
-      const double u_2 = vel2Term - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 15*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 18*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 6*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 14*_stride_pdfs_3];
-      const double u0Mu1 = u_0 + u_1*-1.0;
+      const double u_0 = vel0Term - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 13*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 3*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 7*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3];
+      const double u_1 = vel1Term - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 10*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 12*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 2*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 8*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3];
+      const double u_2 = vel2Term + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 14*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 15*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 18*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 6*_stride_pdfs_3];
+      const double u0Mu1 = u_0 - u_1;
       const double u0Pu1 = u_0 + u_1;
       const double u1Pu2 = u_1 + u_2;
-      const double u1Mu2 = u_1 + u_2*-1.0;
-      const double u0Mu2 = u_0 + u_2*-1.0;
+      const double u1Mu2 = u_1 - u_2;
+      const double u0Mu2 = u_0 - u_2;
       const double u0Pu2 = u_0 + u_2;
-      const double f_eq_common = delta_rho - 1.0*(u_0*u_0) - 1.0*(u_1*u_1) - 1.0*(u_2*u_2);
-      _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = -1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir] + 2.0*((((dir) == (0))) ? (f_eq_common*0.33333333333333331): ((((dir) == (1)) || ((dir) == (2))) ? (delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + 0.33333333333333331*(u_1*u_1)): ((((dir) == (3)) || ((dir) == (4))) ? (delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + 0.33333333333333331*(u_0*u_0)): ((((dir) == (5)) || ((dir) == (6))) ? (delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + 0.33333333333333331*(u_2*u_2)): ((((dir) == (7))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)): ((((dir) == (8)) || ((dir) == (9))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)): ((((dir) == (10))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)): ((((dir) == (11))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)): ((((dir) == (12))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)): ((((dir) == (13))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)): ((((dir) == (14))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)): ((((dir) == (15))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)): ((((dir) == (16))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)): ((((dir) == (17))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)): ((((dir) == (18))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)): (0.0))))))))))))))));
+      const double f_eq_common = delta_rho - u_0*u_0 - u_1*u_1 - u_2*u_2;
+      _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = 2.0*((((dir) == (0))) ? (f_eq_common*0.33333333333333331): ((((dir) == (1)) || ((dir) == (2))) ? (delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + 0.33333333333333331*(u_1*u_1)): ((((dir) == (3)) || ((dir) == (4))) ? (delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + 0.33333333333333331*(u_0*u_0)): ((((dir) == (5)) || ((dir) == (6))) ? (delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + 0.33333333333333331*(u_2*u_2)): ((((dir) == (7))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)): ((((dir) == (8)) || ((dir) == (9))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)): ((((dir) == (10))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)): ((((dir) == (11))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)): ((((dir) == (12))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)): ((((dir) == (13))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)): ((((dir) == (14))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)): ((((dir) == (15))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)): ((((dir) == (16))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)): ((((dir) == (17))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)): ((((dir) == (18))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)): (0.0)))))))))))))))) - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir];
    }
 }
 }
 
-
+//NOLINTEND(readability-non-const-parameter*)
 #ifdef __GNUC__
 #pragma GCC diagnostic pop
 #endif
diff --git a/src/lbm_generated/boundary/FixedDensityD3Q19.h b/src/lbm_generated/boundary/FixedDensityD3Q19.h
index b4575d189724633c503fc0ba94a004c5b07ef9c2..2b6fa1639ecef0a176ac4e0bbfb5e270e4184d43 100644
--- a/src/lbm_generated/boundary/FixedDensityD3Q19.h
+++ b/src/lbm_generated/boundary/FixedDensityD3Q19.h
@@ -19,6 +19,7 @@
 
 #pragma once
 #include "core/DataTypes.h"
+#include "core/logging/Logging.h"
 
 #include "field/GhostLayerField.h"
 #include "domain_decomposition/BlockDataID.h"
@@ -40,6 +41,10 @@
 #define RESTRICT
 #endif
 
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+using walberla::half;
+#endif
+
 namespace walberla {
 namespace lbm {
 
diff --git a/src/lbm_generated/boundary/FixedDensityD3Q27.cpp b/src/lbm_generated/boundary/FixedDensityD3Q27.cpp
index 3ff43bc5efa34a0ba88e8205440f46e5fa6db94b..f8ccd833c7bf8aa9083ade46a47bf30c667c48ce 100644
--- a/src/lbm_generated/boundary/FixedDensityD3Q27.cpp
+++ b/src/lbm_generated/boundary/FixedDensityD3Q27.cpp
@@ -45,9 +45,9 @@ namespace lbm {
 #pragma diag_suppress 177
 #endif
 #endif
-
+//NOLINTBEGIN(readability-non-const-parameter*)
 namespace internal_fixeddensityd3q27_even {
-static FUNC_PREFIX void fixeddensityd3q27_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, double density, int32_t indexVectorSize)
+static FUNC_PREFIX void fixeddensityd3q27_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, double density, int32_t indexVectorSize)
 {
    
    const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 }; 
@@ -59,28 +59,29 @@ static FUNC_PREFIX void fixeddensityd3q27_even(const uint8_t * RESTRICT const _d
    const double delta_rho = rho - 1.0;
    for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
    {
-      const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0]));
-      const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4]));
-      const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8]));
-      const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12]));
+      const int32_t x = *((int32_t *  )(& _data_indexVector[16*ctr_0]));
+      const int32_t y = *((int32_t *  )(& _data_indexVector[16*ctr_0 + 4]));
+      const int32_t z = *((int32_t *  )(& _data_indexVector[16*ctr_0 + 8]));
+      const int32_t dir = *((int32_t *  )(& _data_indexVector[16*ctr_0 + 12]));
       const double vel0Term = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 10*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 14*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 19*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 21*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 23*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 25*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 4*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 8*_stride_pdfs_3];
       const double vel1Term = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 20*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 24*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 7*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3];
       const double vel2Term = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 12*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 13*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 22*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 5*_stride_pdfs_3];
-      const double u_0 = vel0Term - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 13*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 20*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 22*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 24*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 26*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 3*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 7*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3];
-      const double u_1 = vel1Term - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 10*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 12*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 2*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 21*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 22*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 25*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 26*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 19*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 23*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 8*_stride_pdfs_3];
-      const double u_2 = vel2Term - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 15*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 18*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 23*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 24*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 25*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 26*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 6*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 14*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 19*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 20*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 21*_stride_pdfs_3];
-      const double u0Mu1 = u_0 + u_1*-1.0;
+      const double u_0 = vel0Term - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 13*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 20*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 22*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 24*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 26*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 3*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 7*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3];
+      const double u_1 = vel1Term - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 10*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 12*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 19*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 2*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 21*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 22*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 23*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 25*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 26*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 8*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3];
+      const double u_2 = vel2Term + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 14*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 15*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 19*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 20*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 21*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 23*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 24*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 25*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 26*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 6*_stride_pdfs_3];
+      const double u0Mu1 = u_0 - u_1;
       const double u0Pu1 = u_0 + u_1;
       const double u1Pu2 = u_1 + u_2;
-      const double u1Mu2 = u_1 + u_2*-1.0;
-      const double u0Mu2 = u_0 + u_2*-1.0;
+      const double u1Mu2 = u_1 - u_2;
+      const double u0Mu2 = u_0 - u_2;
       const double u0Pu2 = u_0 + u_2;
       const double f_eq_common = delta_rho - 1.5*(u_0*u_0) - 1.5*(u_1*u_1) - 1.5*(u_2*u_2);
-      _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = -1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir] + 2.0*((((dir) == (0))) ? (f_eq_common*0.29629629629629628): ((((dir) == (1)) || ((dir) == (2))) ? (f_eq_common*0.07407407407407407 + 0.33333333333333331*(u_1*u_1)): ((((dir) == (3)) || ((dir) == (4))) ? (f_eq_common*0.07407407407407407 + 0.33333333333333331*(u_0*u_0)): ((((dir) == (5)) || ((dir) == (6))) ? (f_eq_common*0.07407407407407407 + 0.33333333333333331*(u_2*u_2)): ((((dir) == (7))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu1*u0Mu1)): ((((dir) == (8)) || ((dir) == (9))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Pu1*u0Pu1)): ((((dir) == (10))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu1*u0Mu1)): ((((dir) == (11))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Pu2*u1Pu2)): ((((dir) == (12))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Mu2*u1Mu2)): ((((dir) == (13))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu2*u0Mu2)): ((((dir) == (14))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Pu2*u0Pu2)): ((((dir) == (15))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Mu2*u1Mu2)): ((((dir) == (16))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Pu2*u1Pu2)): ((((dir) == (17))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Pu2*u0Pu2)): ((((dir) == (18))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu2*u0Mu2)): ((((dir) == (19))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): ((((dir) == (20))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): ((((dir) == (21))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)): ((((dir) == (22)) || ((dir) == (23))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)): ((((dir) == (24))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)): ((((dir) == (25))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): ((((dir) == (26))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): (0.0)))))))))))))))))))))));
+      _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = 2.0*((((dir) == (0))) ? (f_eq_common*0.29629629629629628): ((((dir) == (1)) || ((dir) == (2))) ? (f_eq_common*0.07407407407407407 + 0.33333333333333331*(u_1*u_1)): ((((dir) == (3)) || ((dir) == (4))) ? (f_eq_common*0.07407407407407407 + 0.33333333333333331*(u_0*u_0)): ((((dir) == (5)) || ((dir) == (6))) ? (f_eq_common*0.07407407407407407 + 0.33333333333333331*(u_2*u_2)): ((((dir) == (7))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu1*u0Mu1)): ((((dir) == (8)) || ((dir) == (9))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Pu1*u0Pu1)): ((((dir) == (10))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu1*u0Mu1)): ((((dir) == (11))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Pu2*u1Pu2)): ((((dir) == (12))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Mu2*u1Mu2)): ((((dir) == (13))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu2*u0Mu2)): ((((dir) == (14))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Pu2*u0Pu2)): ((((dir) == (15))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Mu2*u1Mu2)): ((((dir) == (16))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Pu2*u1Pu2)): ((((dir) == (17))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Pu2*u0Pu2)): ((((dir) == (18))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu2*u0Mu2)): ((((dir) == (19))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): ((((dir) == (20))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): ((((dir) == (21))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)): ((((dir) == (22)) || ((dir) == (23))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)): ((((dir) == (24))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)): ((((dir) == (25))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): ((((dir) == (26))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): (0.0))))))))))))))))))))))) - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir];
    }
 }
 }
 
+//NOLINTEND(readability-non-const-parameter*)
 #ifdef __GNUC__
 #pragma GCC diagnostic pop
 #endif
diff --git a/src/lbm_generated/boundary/FixedDensityD3Q27.h b/src/lbm_generated/boundary/FixedDensityD3Q27.h
index 359540d25af2be0c78b85ad591c27aaba8d48de8..8efeb95c0e764447b0b85aaaea1d02812dbf4216 100644
--- a/src/lbm_generated/boundary/FixedDensityD3Q27.h
+++ b/src/lbm_generated/boundary/FixedDensityD3Q27.h
@@ -19,6 +19,7 @@
 
 #pragma once
 #include "core/DataTypes.h"
+#include "core/logging/Logging.h"
 
 #include "field/GhostLayerField.h"
 #include "domain_decomposition/BlockDataID.h"
@@ -40,6 +41,10 @@
 #define RESTRICT
 #endif
 
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+using walberla::half;
+#endif
+
 namespace walberla {
 namespace lbm {
 
diff --git a/src/lbm_generated/boundary/FreeSlipD3Q19.cpp b/src/lbm_generated/boundary/FreeSlipD3Q19.cpp
index 2e3dc46580b5cbd0bdf533dd33742986ab13cd7f..3bee7fa2f377428848670b9c3b02c3a02a8f93d1 100644
--- a/src/lbm_generated/boundary/FreeSlipD3Q19.cpp
+++ b/src/lbm_generated/boundary/FreeSlipD3Q19.cpp
@@ -45,9 +45,9 @@ namespace lbm {
 #pragma diag_suppress 177
 #endif
 #endif
-
+//NOLINTBEGIN(readability-non-const-parameter*)
 namespace internal_freeslipd3q19_even {
-static FUNC_PREFIX void freeslipd3q19_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
+static FUNC_PREFIX void freeslipd3q19_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
 {
    
    const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; 
@@ -63,16 +63,16 @@ static FUNC_PREFIX void freeslipd3q19_even(const uint8_t * RESTRICT const _data_
    
    for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
    {
-      const int32_t x = *((int32_t * )(& _data_indexVector[32*ctr_0]));
-      const int32_t y = *((int32_t * )(& _data_indexVector[32*ctr_0 + 4]));
-      const int32_t z = *((int32_t * )(& _data_indexVector[32*ctr_0 + 8]));
-      const int32_t dir = *((int32_t * )(& _data_indexVector[32*ctr_0 + 12]));
-      _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 16])) + neighbour_offset_x[dir]) + _stride_pdfs_1*y + _stride_pdfs_1*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 20])) + neighbour_offset_y[dir]) + _stride_pdfs_2*z + _stride_pdfs_2*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 24])) + neighbour_offset_z[dir]) + _stride_pdfs_3**((int32_t * )(& _data_indexVector[32*ctr_0 + 28]))];
+      const int32_t x = *((int32_t *  )(& _data_indexVector[32*ctr_0]));
+      const int32_t y = *((int32_t *  )(& _data_indexVector[32*ctr_0 + 4]));
+      const int32_t z = *((int32_t *  )(& _data_indexVector[32*ctr_0 + 8]));
+      const int32_t dir = *((int32_t *  )(& _data_indexVector[32*ctr_0 + 12]));
+      _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(*((int32_t *  )(& _data_indexVector[32*ctr_0 + 16])) + neighbour_offset_x[dir]) + _stride_pdfs_1*y + _stride_pdfs_1*(*((int32_t *  )(& _data_indexVector[32*ctr_0 + 20])) + neighbour_offset_y[dir]) + _stride_pdfs_2*z + _stride_pdfs_2*(*((int32_t *  )(& _data_indexVector[32*ctr_0 + 24])) + neighbour_offset_z[dir]) + _stride_pdfs_3**((int32_t *  )(& _data_indexVector[32*ctr_0 + 28]))];
    }
 }
 }
 
-
+//NOLINTEND(readability-non-const-parameter*)
 #ifdef __GNUC__
 #pragma GCC diagnostic pop
 #endif
diff --git a/src/lbm_generated/boundary/FreeSlipD3Q19.h b/src/lbm_generated/boundary/FreeSlipD3Q19.h
index 4679ffc4ff0cbf7cc5bfb07d1a9f9d9a7e775e2e..1c436dd3de043565c8f7add8239c851c161f1a7b 100644
--- a/src/lbm_generated/boundary/FreeSlipD3Q19.h
+++ b/src/lbm_generated/boundary/FreeSlipD3Q19.h
@@ -19,6 +19,7 @@
 
 #pragma once
 #include "core/DataTypes.h"
+#include "core/logging/Logging.h"
 
 #include "field/GhostLayerField.h"
 #include "domain_decomposition/BlockDataID.h"
@@ -40,6 +41,10 @@
 #define RESTRICT
 #endif
 
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+using walberla::half;
+#endif
+
 namespace walberla {
 namespace lbm {
 
@@ -253,7 +258,7 @@ public:
                    element.wnx = 0;
                    element.wny = -1;
                    element.wnz = 0;
-                   ref_dir = 1;
+                   ref_dir = 2;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -301,7 +306,7 @@ public:
                    element.wnx = 0;
                    element.wny = 1;
                    element.wnz = 0;
-                   ref_dir = 2;
+                   ref_dir = 1;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -349,7 +354,7 @@ public:
                    element.wnx = 1;
                    element.wny = 0;
                    element.wnz = 0;
-                   ref_dir = 3;
+                   ref_dir = 4;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -397,7 +402,7 @@ public:
                    element.wnx = -1;
                    element.wny = 0;
                    element.wnz = 0;
-                   ref_dir = 4;
+                   ref_dir = 3;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -445,7 +450,7 @@ public:
                    element.wnx = 0;
                    element.wny = 0;
                    element.wnz = -1;
-                   ref_dir = 5;
+                   ref_dir = 6;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -493,7 +498,7 @@ public:
                    element.wnx = 0;
                    element.wny = 0;
                    element.wnz = 1;
-                   ref_dir = 6;
+                   ref_dir = 5;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -541,7 +546,7 @@ public:
                    element.wnx = 1;
                    element.wny = -1;
                    element.wnz = 0;
-                   ref_dir = 7;
+                   ref_dir = 10;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -589,7 +594,7 @@ public:
                    element.wnx = -1;
                    element.wny = -1;
                    element.wnz = 0;
-                   ref_dir = 8;
+                   ref_dir = 9;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -637,7 +642,7 @@ public:
                    element.wnx = 1;
                    element.wny = 1;
                    element.wnz = 0;
-                   ref_dir = 9;
+                   ref_dir = 8;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -685,7 +690,7 @@ public:
                    element.wnx = -1;
                    element.wny = 1;
                    element.wnz = 0;
-                   ref_dir = 10;
+                   ref_dir = 7;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -733,7 +738,7 @@ public:
                    element.wnx = 0;
                    element.wny = -1;
                    element.wnz = -1;
-                   ref_dir = 11;
+                   ref_dir = 16;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -781,7 +786,7 @@ public:
                    element.wnx = 0;
                    element.wny = 1;
                    element.wnz = -1;
-                   ref_dir = 12;
+                   ref_dir = 15;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -829,7 +834,7 @@ public:
                    element.wnx = 1;
                    element.wny = 0;
                    element.wnz = -1;
-                   ref_dir = 13;
+                   ref_dir = 18;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -877,7 +882,7 @@ public:
                    element.wnx = -1;
                    element.wny = 0;
                    element.wnz = -1;
-                   ref_dir = 14;
+                   ref_dir = 17;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -925,7 +930,7 @@ public:
                    element.wnx = 0;
                    element.wny = -1;
                    element.wnz = 1;
-                   ref_dir = 15;
+                   ref_dir = 12;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -973,7 +978,7 @@ public:
                    element.wnx = 0;
                    element.wny = 1;
                    element.wnz = 1;
-                   ref_dir = 16;
+                   ref_dir = 11;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -1021,7 +1026,7 @@ public:
                    element.wnx = 1;
                    element.wny = 0;
                    element.wnz = 1;
-                   ref_dir = 17;
+                   ref_dir = 14;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -1069,7 +1074,7 @@ public:
                    element.wnx = -1;
                    element.wny = 0;
                    element.wnz = 1;
-                   ref_dir = 18;
+                   ref_dir = 13;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
diff --git a/src/lbm_generated/boundary/FreeSlipD3Q27.cpp b/src/lbm_generated/boundary/FreeSlipD3Q27.cpp
index 3364610eec662874c88832e7ebedd144755ccf1a..e71e394966e87e8ee0d2e7a21130954c88b9576e 100644
--- a/src/lbm_generated/boundary/FreeSlipD3Q27.cpp
+++ b/src/lbm_generated/boundary/FreeSlipD3Q27.cpp
@@ -45,9 +45,9 @@ namespace lbm {
 #pragma diag_suppress 177
 #endif
 #endif
-
+//NOLINTBEGIN(readability-non-const-parameter*)
 namespace internal_freeslipd3q27_even {
-static FUNC_PREFIX void freeslipd3q27_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
+static FUNC_PREFIX void freeslipd3q27_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
 {
    
    const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 }; 
@@ -63,16 +63,16 @@ static FUNC_PREFIX void freeslipd3q27_even(const uint8_t * RESTRICT const _data_
    
    for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
    {
-      const int32_t x = *((int32_t * )(& _data_indexVector[32*ctr_0]));
-      const int32_t y = *((int32_t * )(& _data_indexVector[32*ctr_0 + 4]));
-      const int32_t z = *((int32_t * )(& _data_indexVector[32*ctr_0 + 8]));
-      const int32_t dir = *((int32_t * )(& _data_indexVector[32*ctr_0 + 12]));
-      _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 16])) + neighbour_offset_x[dir]) + _stride_pdfs_1*y + _stride_pdfs_1*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 20])) + neighbour_offset_y[dir]) + _stride_pdfs_2*z + _stride_pdfs_2*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 24])) + neighbour_offset_z[dir]) + _stride_pdfs_3**((int32_t * )(& _data_indexVector[32*ctr_0 + 28]))];
+      const int32_t x = *((int32_t *  )(& _data_indexVector[32*ctr_0]));
+      const int32_t y = *((int32_t *  )(& _data_indexVector[32*ctr_0 + 4]));
+      const int32_t z = *((int32_t *  )(& _data_indexVector[32*ctr_0 + 8]));
+      const int32_t dir = *((int32_t *  )(& _data_indexVector[32*ctr_0 + 12]));
+      _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(*((int32_t *  )(& _data_indexVector[32*ctr_0 + 16])) + neighbour_offset_x[dir]) + _stride_pdfs_1*y + _stride_pdfs_1*(*((int32_t *  )(& _data_indexVector[32*ctr_0 + 20])) + neighbour_offset_y[dir]) + _stride_pdfs_2*z + _stride_pdfs_2*(*((int32_t *  )(& _data_indexVector[32*ctr_0 + 24])) + neighbour_offset_z[dir]) + _stride_pdfs_3**((int32_t *  )(& _data_indexVector[32*ctr_0 + 28]))];
    }
 }
 }
 
-
+//NOLINTEND(readability-non-const-parameter*)
 #ifdef __GNUC__
 #pragma GCC diagnostic pop
 #endif
diff --git a/src/lbm_generated/boundary/FreeSlipD3Q27.h b/src/lbm_generated/boundary/FreeSlipD3Q27.h
index 562dfbcadd6e98f88ece133ab724080f3488b77e..759f79100244d378b838a2b0034575f6ae37f584 100644
--- a/src/lbm_generated/boundary/FreeSlipD3Q27.h
+++ b/src/lbm_generated/boundary/FreeSlipD3Q27.h
@@ -19,6 +19,7 @@
 
 #pragma once
 #include "core/DataTypes.h"
+#include "core/logging/Logging.h"
 
 #include "field/GhostLayerField.h"
 #include "domain_decomposition/BlockDataID.h"
@@ -40,6 +41,10 @@
 #define RESTRICT
 #endif
 
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+using walberla::half;
+#endif
+
 namespace walberla {
 namespace lbm {
 
@@ -253,7 +258,7 @@ public:
                    element.wnx = 0;
                    element.wny = -1;
                    element.wnz = 0;
-                   ref_dir = 1;
+                   ref_dir = 2;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -301,7 +306,7 @@ public:
                    element.wnx = 0;
                    element.wny = 1;
                    element.wnz = 0;
-                   ref_dir = 2;
+                   ref_dir = 1;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -349,7 +354,7 @@ public:
                    element.wnx = 1;
                    element.wny = 0;
                    element.wnz = 0;
-                   ref_dir = 3;
+                   ref_dir = 4;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -397,7 +402,7 @@ public:
                    element.wnx = -1;
                    element.wny = 0;
                    element.wnz = 0;
-                   ref_dir = 4;
+                   ref_dir = 3;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -445,7 +450,7 @@ public:
                    element.wnx = 0;
                    element.wny = 0;
                    element.wnz = -1;
-                   ref_dir = 5;
+                   ref_dir = 6;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -493,7 +498,7 @@ public:
                    element.wnx = 0;
                    element.wny = 0;
                    element.wnz = 1;
-                   ref_dir = 6;
+                   ref_dir = 5;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -541,7 +546,7 @@ public:
                    element.wnx = 1;
                    element.wny = -1;
                    element.wnz = 0;
-                   ref_dir = 7;
+                   ref_dir = 10;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -589,7 +594,7 @@ public:
                    element.wnx = -1;
                    element.wny = -1;
                    element.wnz = 0;
-                   ref_dir = 8;
+                   ref_dir = 9;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -637,7 +642,7 @@ public:
                    element.wnx = 1;
                    element.wny = 1;
                    element.wnz = 0;
-                   ref_dir = 9;
+                   ref_dir = 8;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -685,7 +690,7 @@ public:
                    element.wnx = -1;
                    element.wny = 1;
                    element.wnz = 0;
-                   ref_dir = 10;
+                   ref_dir = 7;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -733,7 +738,7 @@ public:
                    element.wnx = 0;
                    element.wny = -1;
                    element.wnz = -1;
-                   ref_dir = 11;
+                   ref_dir = 16;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -781,7 +786,7 @@ public:
                    element.wnx = 0;
                    element.wny = 1;
                    element.wnz = -1;
-                   ref_dir = 12;
+                   ref_dir = 15;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -829,7 +834,7 @@ public:
                    element.wnx = 1;
                    element.wny = 0;
                    element.wnz = -1;
-                   ref_dir = 13;
+                   ref_dir = 18;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -877,7 +882,7 @@ public:
                    element.wnx = -1;
                    element.wny = 0;
                    element.wnz = -1;
-                   ref_dir = 14;
+                   ref_dir = 17;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -925,7 +930,7 @@ public:
                    element.wnx = 0;
                    element.wny = -1;
                    element.wnz = 1;
-                   ref_dir = 15;
+                   ref_dir = 12;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -973,7 +978,7 @@ public:
                    element.wnx = 0;
                    element.wny = 1;
                    element.wnz = 1;
-                   ref_dir = 16;
+                   ref_dir = 11;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -1021,7 +1026,7 @@ public:
                    element.wnx = 1;
                    element.wny = 0;
                    element.wnz = 1;
-                   ref_dir = 17;
+                   ref_dir = 14;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -1069,7 +1074,7 @@ public:
                    element.wnx = -1;
                    element.wny = 0;
                    element.wnz = 1;
-                   ref_dir = 18;
+                   ref_dir = 13;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -1117,7 +1122,7 @@ public:
                    element.wnx = -1;
                    element.wny = -1;
                    element.wnz = -1;
-                   ref_dir = 19;
+                   ref_dir = 26;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -1165,7 +1170,7 @@ public:
                    element.wnx = 1;
                    element.wny = -1;
                    element.wnz = -1;
-                   ref_dir = 20;
+                   ref_dir = 25;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -1213,7 +1218,7 @@ public:
                    element.wnx = -1;
                    element.wny = 1;
                    element.wnz = -1;
-                   ref_dir = 21;
+                   ref_dir = 24;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -1261,7 +1266,7 @@ public:
                    element.wnx = 1;
                    element.wny = 1;
                    element.wnz = -1;
-                   ref_dir = 22;
+                   ref_dir = 23;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -1309,7 +1314,7 @@ public:
                    element.wnx = -1;
                    element.wny = -1;
                    element.wnz = 1;
-                   ref_dir = 23;
+                   ref_dir = 22;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -1357,7 +1362,7 @@ public:
                    element.wnx = 1;
                    element.wny = -1;
                    element.wnz = 1;
-                   ref_dir = 24;
+                   ref_dir = 21;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -1405,7 +1410,7 @@ public:
                    element.wnx = -1;
                    element.wny = 1;
                    element.wnz = 1;
-                   ref_dir = 25;
+                   ref_dir = 20;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
@@ -1453,7 +1458,7 @@ public:
                    element.wnx = 1;
                    element.wny = 1;
                    element.wnz = 1;
-                   ref_dir = 26;
+                   ref_dir = 19;
                 }
                 element.ref_dir = ref_dir;
               indexVectorAll.push_back( element );
diff --git a/src/lbm_generated/boundary/NoSlipD3Q19.cpp b/src/lbm_generated/boundary/NoSlipD3Q19.cpp
index 268cbf43361645c8e7886f6abd86a56089a75fff..b56a975ab78dcdb31040b2b317b7757b188e7274 100644
--- a/src/lbm_generated/boundary/NoSlipD3Q19.cpp
+++ b/src/lbm_generated/boundary/NoSlipD3Q19.cpp
@@ -45,9 +45,9 @@ namespace lbm {
 #pragma diag_suppress 177
 #endif
 #endif
-
+//NOLINTBEGIN(readability-non-const-parameter*)
 namespace internal_noslipd3q19_even {
-static FUNC_PREFIX void noslipd3q19_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
+static FUNC_PREFIX void noslipd3q19_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
 {
    
    const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; 
@@ -57,15 +57,16 @@ static FUNC_PREFIX void noslipd3q19_even(const uint8_t * RESTRICT const _data_in
    
    for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
    {
-      const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0]));
-      const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4]));
-      const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8]));
-      const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12]));
+      const int32_t x = *((int32_t *  )(& _data_indexVector[16*ctr_0]));
+      const int32_t y = *((int32_t *  )(& _data_indexVector[16*ctr_0 + 4]));
+      const int32_t z = *((int32_t *  )(& _data_indexVector[16*ctr_0 + 8]));
+      const int32_t dir = *((int32_t *  )(& _data_indexVector[16*ctr_0 + 12]));
       _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir];
    }
 }
 }
 
+//NOLINTEND(readability-non-const-parameter*)
 #ifdef __GNUC__
 #pragma GCC diagnostic pop
 #endif
diff --git a/src/lbm_generated/boundary/NoSlipD3Q19.h b/src/lbm_generated/boundary/NoSlipD3Q19.h
index 933108eec5fdcdeee8e0af6abb90617fc149307e..7541e7a9c24f542b63e2870012d8078b631935fa 100644
--- a/src/lbm_generated/boundary/NoSlipD3Q19.h
+++ b/src/lbm_generated/boundary/NoSlipD3Q19.h
@@ -19,6 +19,7 @@
 
 #pragma once
 #include "core/DataTypes.h"
+#include "core/logging/Logging.h"
 
 #include "field/GhostLayerField.h"
 #include "domain_decomposition/BlockDataID.h"
@@ -40,6 +41,10 @@
 #define RESTRICT
 #endif
 
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+using walberla::half;
+#endif
+
 namespace walberla {
 namespace lbm {
 
diff --git a/src/lbm_generated/boundary/NoSlipD3Q27.cpp b/src/lbm_generated/boundary/NoSlipD3Q27.cpp
index c38bee8122daa4ee1d09b1b861e5729d232bf310..9adee2246796ee70f3551afb98598b901ba08ea1 100644
--- a/src/lbm_generated/boundary/NoSlipD3Q27.cpp
+++ b/src/lbm_generated/boundary/NoSlipD3Q27.cpp
@@ -45,9 +45,9 @@ namespace lbm {
 #pragma diag_suppress 177
 #endif
 #endif
-
+//NOLINTBEGIN(readability-non-const-parameter*)
 namespace internal_noslipd3q27_even {
-static FUNC_PREFIX void noslipd3q27_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
+static FUNC_PREFIX void noslipd3q27_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
 {
    
    const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 }; 
@@ -57,16 +57,16 @@ static FUNC_PREFIX void noslipd3q27_even(const uint8_t * RESTRICT const _data_in
    
    for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
    {
-      const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0]));
-      const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4]));
-      const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8]));
-      const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12]));
+      const int32_t x = *((int32_t *  )(& _data_indexVector[16*ctr_0]));
+      const int32_t y = *((int32_t *  )(& _data_indexVector[16*ctr_0 + 4]));
+      const int32_t z = *((int32_t *  )(& _data_indexVector[16*ctr_0 + 8]));
+      const int32_t dir = *((int32_t *  )(& _data_indexVector[16*ctr_0 + 12]));
       _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir];
    }
 }
 }
 
-
+//NOLINTEND(readability-non-const-parameter*)
 #ifdef __GNUC__
 #pragma GCC diagnostic pop
 #endif
diff --git a/src/lbm_generated/boundary/NoSlipD3Q27.h b/src/lbm_generated/boundary/NoSlipD3Q27.h
index 56bbfb0611d6a506b3ed4558c388b3d9ed65d443..de0a6a35a8db3be880581f1b93f4b0865b501ae1 100644
--- a/src/lbm_generated/boundary/NoSlipD3Q27.h
+++ b/src/lbm_generated/boundary/NoSlipD3Q27.h
@@ -19,6 +19,7 @@
 
 #pragma once
 #include "core/DataTypes.h"
+#include "core/logging/Logging.h"
 
 #include "field/GhostLayerField.h"
 #include "domain_decomposition/BlockDataID.h"
@@ -40,6 +41,10 @@
 #define RESTRICT
 #endif
 
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+using walberla::half;
+#endif
+
 namespace walberla {
 namespace lbm {
 
diff --git a/src/lbm_generated/boundary/OutflowD3Q19.cpp b/src/lbm_generated/boundary/OutflowD3Q19.cpp
index d42cf90429601d5ed4809c30b8926548d8bf6618..13e14a59f1956c7899102dd585d64ef72c740a0b 100644
--- a/src/lbm_generated/boundary/OutflowD3Q19.cpp
+++ b/src/lbm_generated/boundary/OutflowD3Q19.cpp
@@ -45,13 +45,13 @@ namespace lbm {
 #pragma diag_suppress 177
 #endif
 #endif
-
+//NOLINTBEGIN(readability-non-const-parameter*)
 namespace internal_outflowd3q19_even {
-static FUNC_PREFIX void outflowd3q19_even(const uint8_t * RESTRICT  _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
+static FUNC_PREFIX void outflowd3q19_even(uint8_t * RESTRICT  _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
 {
    
-   const int32_t f_out_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; 
    const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; 
+   const int32_t f_out_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; 
    const int32_t f_in_inv_offsets_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1 }; 
    const int32_t f_in_inv_offsets_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0 }; 
    const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1 }; 
@@ -64,19 +64,19 @@ static FUNC_PREFIX void outflowd3q19_even(const uint8_t * RESTRICT  _data_indexV
    
    for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
    {
-      const int32_t x = *((int32_t * )(& _data_indexVector[32*ctr_0]));
-      const int32_t y = *((int32_t * )(& _data_indexVector[32*ctr_0 + 4]));
-      const int32_t z = *((int32_t * )(& _data_indexVector[32*ctr_0 + 8]));
-      const int32_t dir = *((int32_t * )(& _data_indexVector[32*ctr_0 + 12]));
-      const double pdf_inter = 0.42264973081037427**((double * )(& _data_indexVector[32*ctr_0 + 24])) + 0.57735026918962573**((double * )(& _data_indexVector[32*ctr_0 + 16]));
+      const int32_t x = *((int32_t *  )(& _data_indexVector[32*ctr_0]));
+      const int32_t y = *((int32_t *  )(& _data_indexVector[32*ctr_0 + 4]));
+      const int32_t z = *((int32_t *  )(& _data_indexVector[32*ctr_0 + 8]));
+      const int32_t dir = *((int32_t *  )(& _data_indexVector[32*ctr_0 + 12]));
+      const double pdf_inter = 0.42264973081037427**((double *  )(& _data_indexVector[32*ctr_0 + 24])) + 0.57735026918962573**((double *  )(& _data_indexVector[32*ctr_0 + 16]));
       _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = pdf_inter;
-      *((double * )(& _data_indexVector[32*ctr_0 + 16])) = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(neighbour_offset_x[dir] - 1) + _stride_pdfs_1*y + _stride_pdfs_1*neighbour_offset_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*neighbour_offset_z[dir] + _stride_pdfs_3*f_out_inv_dir_idx[dir]];
-      *((double * )(& _data_indexVector[32*ctr_0 + 24])) = pdf_inter;
+      *((double *  )(& _data_indexVector[32*ctr_0 + 16])) = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(neighbour_offset_x[dir] - 1) + _stride_pdfs_1*y + _stride_pdfs_1*neighbour_offset_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*neighbour_offset_z[dir] + _stride_pdfs_3*f_out_inv_dir_idx[dir]];
+      *((double *  )(& _data_indexVector[32*ctr_0 + 24])) = pdf_inter;
    }
 }
 }
 
-
+//NOLINTEND(readability-non-const-parameter*)
 #ifdef __GNUC__
 #pragma GCC diagnostic pop
 #endif
diff --git a/src/lbm_generated/boundary/OutflowD3Q19.h b/src/lbm_generated/boundary/OutflowD3Q19.h
index bb2999966556997e70c9f469e65062951276a601..bcde26bfee3c78e1fe879df99592f9754a309075 100644
--- a/src/lbm_generated/boundary/OutflowD3Q19.h
+++ b/src/lbm_generated/boundary/OutflowD3Q19.h
@@ -19,6 +19,7 @@
 
 #pragma once
 #include "core/DataTypes.h"
+#include "core/logging/Logging.h"
 
 #include "field/GhostLayerField.h"
 #include "domain_decomposition/BlockDataID.h"
@@ -40,6 +41,10 @@
 #define RESTRICT
 #endif
 
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+using walberla::half;
+#endif
+
 namespace walberla {
 namespace lbm {
 
@@ -174,8 +179,8 @@ public:
            if ( isFlagSet( it.neighbor(1, 0, 0 , 0 ), boundaryFlag ) )
            {
               auto element = IndexInfo(it.x(), it.y(),  it.z(),  4 );
-              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3);
-                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3);
+              element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3) );
+                element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3) );
               indexVectorAll.push_back( element );
               if( inner.contains( it.x(), it.y(), it.z() ) )
                  indexVectorInner.push_back( element );
@@ -192,8 +197,8 @@ public:
            if ( isFlagSet( it.neighbor(1, 1, 0 , 0 ), boundaryFlag ) )
            {
               auto element = IndexInfo(it.x(), it.y(),  it.z(),  8 );
-              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9);
-                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9);
+              element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9) );
+                element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9) );
               indexVectorAll.push_back( element );
               if( inner.contains( it.x(), it.y(), it.z() ) )
                  indexVectorInner.push_back( element );
@@ -210,8 +215,8 @@ public:
            if ( isFlagSet( it.neighbor(1, -1, 0 , 0 ), boundaryFlag ) )
            {
               auto element = IndexInfo(it.x(), it.y(),  it.z(),  10 );
-              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7);
-                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7);
+              element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7) );
+                element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7) );
               indexVectorAll.push_back( element );
               if( inner.contains( it.x(), it.y(), it.z() ) )
                  indexVectorInner.push_back( element );
@@ -228,8 +233,8 @@ public:
            if ( isFlagSet( it.neighbor(1, 0, 1 , 0 ), boundaryFlag ) )
            {
               auto element = IndexInfo(it.x(), it.y(),  it.z(),  14 );
-              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17);
-                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17);
+              element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17) );
+                element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17) );
               indexVectorAll.push_back( element );
               if( inner.contains( it.x(), it.y(), it.z() ) )
                  indexVectorInner.push_back( element );
@@ -246,8 +251,8 @@ public:
            if ( isFlagSet( it.neighbor(1, 0, -1 , 0 ), boundaryFlag ) )
            {
               auto element = IndexInfo(it.x(), it.y(),  it.z(),  18 );
-              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13);
-                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13);
+              element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13) );
+                element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13) );
               indexVectorAll.push_back( element );
               if( inner.contains( it.x(), it.y(), it.z() ) )
                  indexVectorInner.push_back( element );
diff --git a/src/lbm_generated/boundary/OutflowD3Q27.cpp b/src/lbm_generated/boundary/OutflowD3Q27.cpp
index 8ec9a490b443740ff1ae24adfa1a1739261311a2..70926d565e4964cb937d3b9a4133ef008d141ce6 100644
--- a/src/lbm_generated/boundary/OutflowD3Q27.cpp
+++ b/src/lbm_generated/boundary/OutflowD3Q27.cpp
@@ -45,13 +45,13 @@ namespace lbm {
 #pragma diag_suppress 177
 #endif
 #endif
-
+//NOLINTBEGIN(readability-non-const-parameter*)
 namespace internal_outflowd3q27_even {
-static FUNC_PREFIX void outflowd3q27_even(const uint8_t * RESTRICT  _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
+static FUNC_PREFIX void outflowd3q27_even(uint8_t * RESTRICT  _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
 {
    
-   const int32_t f_out_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 }; 
    const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 }; 
+   const int32_t f_out_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 }; 
    const int32_t f_in_inv_offsets_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1,1,-1,1,-1,1,-1,1,-1 }; 
    const int32_t f_in_inv_offsets_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0,1,1,-1,-1,1,1,-1,-1 }; 
    const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1,1,1,1,1,-1,-1,-1,-1 }; 
@@ -64,19 +64,19 @@ static FUNC_PREFIX void outflowd3q27_even(const uint8_t * RESTRICT  _data_indexV
    
    for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
    {
-      const int32_t x = *((int32_t * )(& _data_indexVector[32*ctr_0]));
-      const int32_t y = *((int32_t * )(& _data_indexVector[32*ctr_0 + 4]));
-      const int32_t z = *((int32_t * )(& _data_indexVector[32*ctr_0 + 8]));
-      const int32_t dir = *((int32_t * )(& _data_indexVector[32*ctr_0 + 12]));
-      const double pdf_inter = 0.42264973081037427**((double * )(& _data_indexVector[32*ctr_0 + 24])) + 0.57735026918962573**((double * )(& _data_indexVector[32*ctr_0 + 16]));
+      const int32_t x = *((int32_t *  )(& _data_indexVector[32*ctr_0]));
+      const int32_t y = *((int32_t *  )(& _data_indexVector[32*ctr_0 + 4]));
+      const int32_t z = *((int32_t *  )(& _data_indexVector[32*ctr_0 + 8]));
+      const int32_t dir = *((int32_t *  )(& _data_indexVector[32*ctr_0 + 12]));
+      const double pdf_inter = 0.42264973081037427**((double *  )(& _data_indexVector[32*ctr_0 + 24])) + 0.57735026918962573**((double *  )(& _data_indexVector[32*ctr_0 + 16]));
       _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = pdf_inter;
-      *((double * )(& _data_indexVector[32*ctr_0 + 16])) = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(neighbour_offset_x[dir] - 1) + _stride_pdfs_1*y + _stride_pdfs_1*neighbour_offset_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*neighbour_offset_z[dir] + _stride_pdfs_3*f_out_inv_dir_idx[dir]];
-      *((double * )(& _data_indexVector[32*ctr_0 + 24])) = pdf_inter;
+      *((double *  )(& _data_indexVector[32*ctr_0 + 16])) = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(neighbour_offset_x[dir] - 1) + _stride_pdfs_1*y + _stride_pdfs_1*neighbour_offset_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*neighbour_offset_z[dir] + _stride_pdfs_3*f_out_inv_dir_idx[dir]];
+      *((double *  )(& _data_indexVector[32*ctr_0 + 24])) = pdf_inter;
    }
 }
 }
 
-
+//NOLINTEND(readability-non-const-parameter*)
 #ifdef __GNUC__
 #pragma GCC diagnostic pop
 #endif
diff --git a/src/lbm_generated/boundary/OutflowD3Q27.h b/src/lbm_generated/boundary/OutflowD3Q27.h
index 53b4e4bae5e6c6da6b4b108751120bf90a5ab25b..e62365e4733fa00988d061b45734d703976897b6 100644
--- a/src/lbm_generated/boundary/OutflowD3Q27.h
+++ b/src/lbm_generated/boundary/OutflowD3Q27.h
@@ -19,6 +19,7 @@
 
 #pragma once
 #include "core/DataTypes.h"
+#include "core/logging/Logging.h"
 
 #include "field/GhostLayerField.h"
 #include "domain_decomposition/BlockDataID.h"
@@ -40,6 +41,10 @@
 #define RESTRICT
 #endif
 
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+using walberla::half;
+#endif
+
 namespace walberla {
 namespace lbm {
 
@@ -174,8 +179,8 @@ public:
            if ( isFlagSet( it.neighbor(1, 0, 0 , 0 ), boundaryFlag ) )
            {
               auto element = IndexInfo(it.x(), it.y(),  it.z(),  4 );
-              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3);
-                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3);
+              element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3) );
+                element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3) );
               indexVectorAll.push_back( element );
               if( inner.contains( it.x(), it.y(), it.z() ) )
                  indexVectorInner.push_back( element );
@@ -192,8 +197,8 @@ public:
            if ( isFlagSet( it.neighbor(1, 1, 0 , 0 ), boundaryFlag ) )
            {
               auto element = IndexInfo(it.x(), it.y(),  it.z(),  8 );
-              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9);
-                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9);
+              element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9) );
+                element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9) );
               indexVectorAll.push_back( element );
               if( inner.contains( it.x(), it.y(), it.z() ) )
                  indexVectorInner.push_back( element );
@@ -210,8 +215,8 @@ public:
            if ( isFlagSet( it.neighbor(1, -1, 0 , 0 ), boundaryFlag ) )
            {
               auto element = IndexInfo(it.x(), it.y(),  it.z(),  10 );
-              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7);
-                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7);
+              element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7) );
+                element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7) );
               indexVectorAll.push_back( element );
               if( inner.contains( it.x(), it.y(), it.z() ) )
                  indexVectorInner.push_back( element );
@@ -228,8 +233,8 @@ public:
            if ( isFlagSet( it.neighbor(1, 0, 1 , 0 ), boundaryFlag ) )
            {
               auto element = IndexInfo(it.x(), it.y(),  it.z(),  14 );
-              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17);
-                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17);
+              element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17) );
+                element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17) );
               indexVectorAll.push_back( element );
               if( inner.contains( it.x(), it.y(), it.z() ) )
                  indexVectorInner.push_back( element );
@@ -246,8 +251,8 @@ public:
            if ( isFlagSet( it.neighbor(1, 0, -1 , 0 ), boundaryFlag ) )
            {
               auto element = IndexInfo(it.x(), it.y(),  it.z(),  18 );
-              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13);
-                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13);
+              element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13) );
+                element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13) );
               indexVectorAll.push_back( element );
               if( inner.contains( it.x(), it.y(), it.z() ) )
                  indexVectorInner.push_back( element );
@@ -264,8 +269,8 @@ public:
            if ( isFlagSet( it.neighbor(1, 1, 1 , 0 ), boundaryFlag ) )
            {
               auto element = IndexInfo(it.x(), it.y(),  it.z(),  19 );
-              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(1), 26);
-                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(1), 26);
+              element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(1), 26) );
+                element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(1), 26) );
               indexVectorAll.push_back( element );
               if( inner.contains( it.x(), it.y(), it.z() ) )
                  indexVectorInner.push_back( element );
@@ -282,8 +287,8 @@ public:
            if ( isFlagSet( it.neighbor(1, -1, 1 , 0 ), boundaryFlag ) )
            {
               auto element = IndexInfo(it.x(), it.y(),  it.z(),  21 );
-              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(1), 24);
-                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(1), 24);
+              element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(1), 24) );
+                element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(1), 24) );
               indexVectorAll.push_back( element );
               if( inner.contains( it.x(), it.y(), it.z() ) )
                  indexVectorInner.push_back( element );
@@ -300,8 +305,8 @@ public:
            if ( isFlagSet( it.neighbor(1, 1, -1 , 0 ), boundaryFlag ) )
            {
               auto element = IndexInfo(it.x(), it.y(),  it.z(),  23 );
-              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(-1), 22);
-                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(-1), 22);
+              element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(-1), 22) );
+                element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(-1), 22) );
               indexVectorAll.push_back( element );
               if( inner.contains( it.x(), it.y(), it.z() ) )
                  indexVectorInner.push_back( element );
@@ -318,8 +323,8 @@ public:
            if ( isFlagSet( it.neighbor(1, -1, -1 , 0 ), boundaryFlag ) )
            {
               auto element = IndexInfo(it.x(), it.y(),  it.z(),  25 );
-              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(-1), 20);
-                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(-1), 20);
+              element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(-1), 20) );
+                element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(-1), 20) );
               indexVectorAll.push_back( element );
               if( inner.contains( it.x(), it.y(), it.z() ) )
                  indexVectorInner.push_back( element );
diff --git a/src/lbm_generated/boundary/UBBD3Q19.cpp b/src/lbm_generated/boundary/UBBD3Q19.cpp
index 0a88d2feeff0237881df80f6494a4f58f8936e02..7de5a364b428df4ec00b2d11beb40b676b820441 100644
--- a/src/lbm_generated/boundary/UBBD3Q19.cpp
+++ b/src/lbm_generated/boundary/UBBD3Q19.cpp
@@ -45,9 +45,9 @@ namespace lbm {
 #pragma diag_suppress 177
 #endif
 #endif
-
+//NOLINTBEGIN(readability-non-const-parameter*)
 namespace internal_ubbd3q19_even {
-static FUNC_PREFIX void ubbd3q19_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize, double u_x, double u_y, double u_z)
+static FUNC_PREFIX void ubbd3q19_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize, double u_x, double u_y, double u_z)
 {
    
    const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; 
@@ -56,7 +56,7 @@ static FUNC_PREFIX void ubbd3q19_even(const uint8_t * RESTRICT const _data_index
    const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1 }; 
    
    
-   const double weights [] = {0.33333333333333333, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778};
+   const double weights [] = {((double)(0.33333333333333333)), ((double)(0.055555555555555556)), ((double)(0.055555555555555556)), ((double)(0.055555555555555556)), ((double)(0.055555555555555556)), ((double)(0.055555555555555556)), ((double)(0.055555555555555556)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778))};
    
    
    
@@ -66,15 +66,16 @@ static FUNC_PREFIX void ubbd3q19_even(const uint8_t * RESTRICT const _data_index
    
    for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
    {
-      const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0]));
-      const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4]));
-      const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8]));
-      const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12]));
-      _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = (u_x*6.0*((double)(neighbour_offset_x[dir])) + u_y*6.0*((double)(neighbour_offset_y[dir])) + u_z*6.0*((double)(neighbour_offset_z[dir])))*-1.0*weights[dir] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir];
+      const int32_t x = *((int32_t *  )(& _data_indexVector[16*ctr_0]));
+      const int32_t y = *((int32_t *  )(& _data_indexVector[16*ctr_0 + 4]));
+      const int32_t z = *((int32_t *  )(& _data_indexVector[16*ctr_0 + 8]));
+      const int32_t dir = *((int32_t *  )(& _data_indexVector[16*ctr_0 + 12]));
+      _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = -(u_x*6.0*((double)(neighbour_offset_x[dir])) + u_y*6.0*((double)(neighbour_offset_y[dir])) + u_z*6.0*((double)(neighbour_offset_z[dir])))*weights[dir] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir];
    }
 }
 }
 
+//NOLINTEND(readability-non-const-parameter*)
 #ifdef __GNUC__
 #pragma GCC diagnostic pop
 #endif
@@ -101,8 +102,8 @@ void UBBD3Q19::run_impl(IBlock * block, IndexVectors::Type type)
 
    uint8_t timestep = pdfs->getTimestep();
    auto & u_y = u_y_;
-    auto & u_x = u_x_;
     auto & u_z = u_z_;
+    auto & u_x = u_x_;
    WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))
     double * RESTRICT  _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
     const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
diff --git a/src/lbm_generated/boundary/UBBD3Q19.h b/src/lbm_generated/boundary/UBBD3Q19.h
index f57bac12d404b9b3d8819d7955dc65c3cdbcab61..592d000079c9630a0b538346d2897399c2ffc094 100644
--- a/src/lbm_generated/boundary/UBBD3Q19.h
+++ b/src/lbm_generated/boundary/UBBD3Q19.h
@@ -19,6 +19,7 @@
 
 #pragma once
 #include "core/DataTypes.h"
+#include "core/logging/Logging.h"
 
 #include "field/GhostLayerField.h"
 #include "domain_decomposition/BlockDataID.h"
@@ -40,6 +41,10 @@
 #define RESTRICT
 #endif
 
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+using walberla::half;
+#endif
+
 namespace walberla {
 namespace lbm {
 
diff --git a/src/lbm_generated/boundary/UBBD3Q27.cpp b/src/lbm_generated/boundary/UBBD3Q27.cpp
index 08ee3ef38ef4460b590216b789caea5457da8b97..b2cc66552d5e39c84c90bc61d2faaf77de59728c 100644
--- a/src/lbm_generated/boundary/UBBD3Q27.cpp
+++ b/src/lbm_generated/boundary/UBBD3Q27.cpp
@@ -45,9 +45,9 @@ namespace lbm {
 #pragma diag_suppress 177
 #endif
 #endif
-
+//NOLINTBEGIN(readability-non-const-parameter*)
 namespace internal_ubbd3q27_even {
-static FUNC_PREFIX void ubbd3q27_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize, double u_x, double u_y, double u_z)
+static FUNC_PREFIX void ubbd3q27_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize, double u_x, double u_y, double u_z)
 {
    
    const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 }; 
@@ -56,7 +56,7 @@ static FUNC_PREFIX void ubbd3q27_even(const uint8_t * RESTRICT const _data_index
    const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1,1,1,1,1,-1,-1,-1,-1 }; 
    
    
-   const double weights [] = {0.29629629629629630, 0.074074074074074074, 0.074074074074074074, 0.074074074074074074, 0.074074074074074074, 0.074074074074074074, 0.074074074074074074, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.0046296296296296296, 0.0046296296296296296, 0.0046296296296296296, 0.0046296296296296296, 0.0046296296296296296, 0.0046296296296296296, 0.0046296296296296296, 0.0046296296296296296};
+   const double weights [] = {((double)(0.29629629629629630)), ((double)(0.074074074074074074)), ((double)(0.074074074074074074)), ((double)(0.074074074074074074)), ((double)(0.074074074074074074)), ((double)(0.074074074074074074)), ((double)(0.074074074074074074)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.0046296296296296296)), ((double)(0.0046296296296296296)), ((double)(0.0046296296296296296)), ((double)(0.0046296296296296296)), ((double)(0.0046296296296296296)), ((double)(0.0046296296296296296)), ((double)(0.0046296296296296296)), ((double)(0.0046296296296296296))};
    
    
    
@@ -66,16 +66,16 @@ static FUNC_PREFIX void ubbd3q27_even(const uint8_t * RESTRICT const _data_index
    
    for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
    {
-      const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0]));
-      const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4]));
-      const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8]));
-      const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12]));
-      _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = (u_x*6.0*((double)(neighbour_offset_x[dir])) + u_y*6.0*((double)(neighbour_offset_y[dir])) + u_z*6.0*((double)(neighbour_offset_z[dir])))*-1.0*weights[dir] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir];
+      const int32_t x = *((int32_t *  )(& _data_indexVector[16*ctr_0]));
+      const int32_t y = *((int32_t *  )(& _data_indexVector[16*ctr_0 + 4]));
+      const int32_t z = *((int32_t *  )(& _data_indexVector[16*ctr_0 + 8]));
+      const int32_t dir = *((int32_t *  )(& _data_indexVector[16*ctr_0 + 12]));
+      _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = -(u_x*6.0*((double)(neighbour_offset_x[dir])) + u_y*6.0*((double)(neighbour_offset_y[dir])) + u_z*6.0*((double)(neighbour_offset_z[dir])))*weights[dir] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir];
    }
 }
 }
 
-
+//NOLINTEND(readability-non-const-parameter*)
 #ifdef __GNUC__
 #pragma GCC diagnostic pop
 #endif
@@ -102,8 +102,8 @@ void UBBD3Q27::run_impl(IBlock * block, IndexVectors::Type type)
 
    uint8_t timestep = pdfs->getTimestep();
    auto & u_y = u_y_;
-    auto & u_x = u_x_;
     auto & u_z = u_z_;
+    auto & u_x = u_x_;
    WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))
     double * RESTRICT  _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
     const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
diff --git a/src/lbm_generated/boundary/UBBD3Q27.h b/src/lbm_generated/boundary/UBBD3Q27.h
index b7836d6958677e9b221f74f37b014b3de35019c7..9ce7c3ebac5a6bdab50e63a2892e8e3325a17687 100644
--- a/src/lbm_generated/boundary/UBBD3Q27.h
+++ b/src/lbm_generated/boundary/UBBD3Q27.h
@@ -19,6 +19,7 @@
 
 #pragma once
 #include "core/DataTypes.h"
+#include "core/logging/Logging.h"
 
 #include "field/GhostLayerField.h"
 #include "domain_decomposition/BlockDataID.h"
@@ -40,6 +41,10 @@
 #define RESTRICT
 #endif
 
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+using walberla::half;
+#endif
+
 namespace walberla {
 namespace lbm {
 
diff --git a/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h b/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h
index 1b3e43a51dd7e7e8965e2152c58e493f73d8af84..b74539e93204db42304f2fb1b26fb4332ca16c86 100644
--- a/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h
+++ b/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h
@@ -54,6 +54,12 @@ class NonuniformPackingKernelsWrapper
    void localCopyDirection(PdfField_T* srcField, CellInterval& srcInterval, PdfField_T* dstField,
                            CellInterval& dstInterval, Direction dir) const                                    = 0;
 
+   void localCopyRedistribute(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
+                              CellInterval dstInterval, Direction dir) const = 0;
+
+   void localPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskField* maskField, CellInterval srcInterval,
+                                PdfField_T* dstField, CellInterval dstInterval, Direction dir) const = 0;
+
    void unpackRedistribute(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer,
                            stencil::Direction dir) const = 0;
 
@@ -110,6 +116,18 @@ class NonuniformPackingKernelsWrapper< PdfField_T, false >
       kernels_.localCopyDirection(srcField, srcInterval, dstField, dstInterval, dir);
    }
 
+   void localCopyRedistribute(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
+                              CellInterval dstInterval, Direction dir) const
+   {
+      kernels_.localCopyRedistribute(srcField, srcInterval, dstField, dstInterval, dir);
+   }
+
+   void localPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskField* maskField, CellInterval srcInterval,
+                                PdfField_T* dstField, CellInterval dstInterval, Direction dir) const
+   {
+      kernels_.localPartialCoalescence(srcField, maskField, srcInterval, dstField, dstInterval, dir);
+   }
+
    void unpackRedistribute(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer,
                            stencil::Direction dir) const
    {
@@ -194,10 +212,33 @@ class NonuniformPackingKernelsWrapper< PdfField_T, true >
       kernels_.localCopyDirection(srcField, srcInterval, dstField, dstInterval, dir, timestep);
    }
 
+   void localCopyRedistribute(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
+                              CellInterval dstInterval, Direction dir) const
+   {
+      uint8_t timestep = srcField->getTimestep();
+      WALBERLA_ASSERT(!((dstField->getTimestep() & 1) ^ 1), "When the course to fine step is executed, the fine Field must "
+                                                            "be on an odd timestep, while the source field could either be "
+                                                            "on an even or an odd state.")
+      kernels_.localCopyRedistribute(srcField, srcInterval, dstField, dstInterval, dir, timestep);
+   }
+
+   void localPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskField* maskField, CellInterval srcInterval,
+                                PdfField_T* dstField, CellInterval dstInterval, Direction dir) const
+   {
+      uint8_t timestep = dstField->getTimestep();
+      WALBERLA_ASSERT((srcField->getTimestep() & 1) ^ 1, "When the fine to coarse step is executed, the fine Field must "
+                                                         "be on an even timestep, while the source field could either be "
+                                                         "on an even or an odd state.")
+      kernels_.localPartialCoalescence(srcField, maskField, srcInterval, dstField, dstInterval, dir, timestep);
+   }
+
    void unpackRedistribute(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer,
                            stencil::Direction dir) const
    {
       uint8_t timestep = dstField->getTimestep();
+      WALBERLA_ASSERT(!((dstField->getTimestep() & 1) ^ 1), "When the course to fine step is executed, the fine Field must "
+                                                            "be on an odd timestep, while the source field could either be "
+                                                            "on an even or an odd state.")
       kernels_.unpackRedistribute(dstField, ci, inBuffer, dir, timestep);
    }
 
@@ -205,6 +246,9 @@ class NonuniformPackingKernelsWrapper< PdfField_T, true >
                                unsigned char* outBuffer, Direction dir) const
    {
       uint8_t timestep = srcField->getTimestep();
+      WALBERLA_ASSERT((srcField->getTimestep() & 1) ^ 1, "When the fine to coarse step is executed, the fine Field must "
+                                                         "be on an even timestep, while the source field could either be "
+                                                         "on an even or an odd state.")
       kernels_.packPartialCoalescence(srcField, maskField, ci, outBuffer, dir, timestep);
    }
 
diff --git a/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.impl.h b/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.impl.h
index cf36a61f9813989b5e975e6782f5c3ea138a3e96..7c45d93defcc399d549720c1f35a2b6a332dc248 100644
--- a/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.impl.h
+++ b/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.impl.h
@@ -167,19 +167,19 @@ void NonuniformGeneratedPdfPackInfo< PdfField_T >::communicateLocalCoarseToFine(
       Direction unpackDir      = dstIntervals[index].first;
       CellInterval dstInterval = dstIntervals[index].second;
 
-      uint_t packSize      = kernels_.size(srcInterval);
-
 #ifndef NDEBUG
       Direction const packDir        = srcIntervals[index].first;
       WALBERLA_ASSERT_EQUAL(packDir, stencil::inverseDir[unpackDir])
       uint_t unpackSize = kernels_.redistributeSize(dstInterval);
-      WALBERLA_ASSERT_EQUAL(packSize, unpackSize)
+      WALBERLA_ASSERT_EQUAL(kernels_.size(srcInterval), unpackSize)
 #endif
 
       // TODO: This is a dirty workaround. Code-generate direct redistribution!
-      std::vector< unsigned char > buffer(packSize);
+      std::vector< unsigned char > buffer(kernels_.size(srcInterval));
       kernels_.packAll(srcField, srcInterval, &buffer[0]);
       kernels_.unpackRedistribute(dstField, dstInterval, &buffer[0], unpackDir);
+
+      // kernels_.localCopyRedistribute(srcField, srcInterval, dstField, dstInterval, unpackDir);
    }
 }
 
@@ -228,20 +228,20 @@ void walberla::lbm_generated::NonuniformGeneratedPdfPackInfo< PdfField_T >::comm
 
    CellInterval srcInterval;
    srcField->getGhostRegion(dir, srcInterval, 2);
-   uint_t packSize = kernels_.partialCoalescenceSize(srcInterval, dir);
 
    CellInterval dstInterval = getCoarseBlockCoalescenceInterval(coarseReceiver, fineSender->getId(),
                                                                 invDir, dstField);
 
 #ifndef NDEBUG
    uint_t unpackSize = kernels_.size(dstInterval, invDir);
-   WALBERLA_ASSERT_EQUAL(packSize, unpackSize)
+   WALBERLA_ASSERT_EQUAL(kernels_.partialCoalescenceSize(srcInterval, dir), unpackSize)
 #endif
 
    // TODO: This is a dirty workaround. Code-generate direct redistribution!
-   std::vector< unsigned char > buffer(packSize);
+   std::vector< unsigned char > buffer(kernels_.partialCoalescenceSize(srcInterval, dir));
    kernels_.packPartialCoalescence(srcField, maskField, srcInterval, &buffer[0], dir);
    kernels_.unpackCoalescence(dstField, dstInterval, &buffer[0], invDir);
+   // kernels_.localPartialCoalescence(srcField, maskField, srcInterval, dstField, dstInterval, dir);
 }
 
 template< typename PdfField_T>
diff --git a/src/lbm_generated/evaluation/PerformanceEvaluation.h b/src/lbm_generated/evaluation/PerformanceEvaluation.h
index 36f112ac3360f1125fcc11733c8ab56955daceb0..98134e0db7be9cdb921ea5780e4f6ce6f7623562 100644
--- a/src/lbm_generated/evaluation/PerformanceEvaluation.h
+++ b/src/lbm_generated/evaluation/PerformanceEvaluation.h
@@ -92,6 +92,16 @@ public:
       return c;
    }
 
+   uint64_t numberOfCells() const
+   {
+      return cells_.numberOfCells();
+   }
+
+   uint64_t numberOfFluidCells() const
+   {
+      return fluidCells_.numberOfCells();
+   }
+
    double mlups( const uint_t timeSteps, const double time ) const
    {
       double m( 0.0 );
diff --git a/src/lbm_generated/field/AddToStorage.h b/src/lbm_generated/field/AddToStorage.h
index afb86819931238443443f3095f73880aec401d36..95b0089d9f72f27eb2d335c5c3347228183e9f2d 100644
--- a/src/lbm_generated/field/AddToStorage.h
+++ b/src/lbm_generated/field/AddToStorage.h
@@ -41,7 +41,7 @@ public:
    using Base_T = field::BlockDataHandling<PdfField_T, LatticeStorageSpecification_T::Stencil::D == 2>;
 
    PdfFieldHandling( const weak_ptr< StructuredBlockStorage > & blocks, const LatticeStorageSpecification_T & storageSpecification,
-                     const uint_t nrOfGhostLayers, const field::Layout & layout, const shared_ptr< field::FieldAllocator<real_t> > alloc = nullptr ) :
+                     const uint_t nrOfGhostLayers, const field::Layout & layout, const shared_ptr< field::FieldAllocator<typename LatticeStorageSpecification_T::value_type> > alloc = nullptr ) :
       blocks_( blocks ), storageSpecification_( storageSpecification ),
       nrOfGhostLayers_( nrOfGhostLayers ), layout_( layout ), alloc_( alloc ){}
 
@@ -106,7 +106,7 @@ private:
 
    uint_t            nrOfGhostLayers_;
    field::Layout     layout_;
-   shared_ptr< field::FieldAllocator<real_t> > alloc_;
+   shared_ptr< field::FieldAllocator<typename LatticeStorageSpecification_T::value_type> > alloc_;
 
 }; // class PdfFieldHandling
 
@@ -121,10 +121,10 @@ BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, c
                                   const field::Layout & layout = field::fzyx,
                                   const Set<SUID> & requiredSelectors     = Set<SUID>::emptySet(),
                                   const Set<SUID> & incompatibleSelectors = Set<SUID>::emptySet(),
-                                  const shared_ptr< field::FieldAllocator<real_t> > alloc = nullptr)
+                                  const shared_ptr< field::FieldAllocator<typename LatticeStorageSpecification_T::value_type> > alloc = nullptr)
 {
    return blocks->addBlockData( make_shared< internal::PdfFieldHandling< LatticeStorageSpecification_T > >(
-                                   blocks, storageSpecification, ghostLayers, layout, alloc ),
+                                blocks, storageSpecification, ghostLayers, layout, alloc ),
                                 identifier, requiredSelectors, incompatibleSelectors );
 }
 
@@ -134,7 +134,7 @@ BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, c
                                  const field::Layout & layout = field::fzyx,
                                  const Set<SUID> & requiredSelectors     = Set<SUID>::emptySet(),
                                  const Set<SUID> & incompatibleSelectors = Set<SUID>::emptySet(),
-                                 const shared_ptr< field::FieldAllocator<real_t> > alloc = nullptr)
+                                 const shared_ptr< field::FieldAllocator<typename LatticeStorageSpecification_T::value_type> > alloc = nullptr)
 {
    auto ghostLayers = uint_c(1);
 
@@ -148,7 +148,7 @@ BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, c
                                  const LatticeStorageSpecification_T & storageSpecification,
                                  const Set<SUID> & requiredSelectors     = Set<SUID>::emptySet(),
                                  const Set<SUID> & incompatibleSelectors = Set<SUID>::emptySet(),
-                                 const shared_ptr< field::FieldAllocator<real_t> > alloc = nullptr)
+                                 const shared_ptr< field::FieldAllocator<typename LatticeStorageSpecification_T::value_type> > alloc = nullptr)
 {
    auto ghostLayers = uint_c(1);
    auto layout = field::fzyx;
@@ -161,7 +161,7 @@ BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, c
 template< typename LatticeStorageSpecification_T, typename BlockStorage_T >
 BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, const std::string & identifier,
                                  const LatticeStorageSpecification_T & storageSpecification,
-                                 const shared_ptr< field::FieldAllocator<real_t> > alloc = nullptr)
+                                 const shared_ptr< field::FieldAllocator<typename LatticeStorageSpecification_T::value_type> > alloc = nullptr)
 {
    auto ghostLayers = uint_c(1);
    auto layout = field::fzyx;
@@ -177,7 +177,7 @@ template< typename LatticeStorageSpecification_T, typename BlockStorage_T >
 BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, const std::string & identifier,
                                  const LatticeStorageSpecification_T & storageSpecification,
                                  const field::Layout & layout = field::fzyx,
-                                 const shared_ptr< field::FieldAllocator<real_t> > alloc = nullptr)
+                                 const shared_ptr< field::FieldAllocator<typename LatticeStorageSpecification_T::value_type> > alloc = nullptr)
 {
    auto ghostLayers = uint_c(1);
    auto requiredSelectors = Set<SUID>::emptySet();
@@ -193,7 +193,7 @@ BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, c
                                  const LatticeStorageSpecification_T & storageSpecification,
                                  const uint_t ghostLayers,
                                  const field::Layout & layout,
-                                 const shared_ptr< field::FieldAllocator<real_t> > alloc)
+                                 const shared_ptr< field::FieldAllocator<typename LatticeStorageSpecification_T::value_type> > alloc)
 {
    auto requiredSelectors = Set<SUID>::emptySet();
    auto incompatibleSelectors = Set<SUID>::emptySet();
diff --git a/src/lbm_generated/field/PdfField.h b/src/lbm_generated/field/PdfField.h
index 6e6b7ee88fd5e9ee0be1dbfb46da6d6e524d5536..2dce8976a10f80a35471baa7be5e7fbdde73a9fc 100644
--- a/src/lbm_generated/field/PdfField.h
+++ b/src/lbm_generated/field/PdfField.h
@@ -28,7 +28,7 @@
 namespace walberla::lbm_generated {
 
 template< typename LatticeStorageSpecification_T >
-class PdfField : public GhostLayerField< real_t, LatticeStorageSpecification_T::Stencil::Size >
+class PdfField : public GhostLayerField< typename LatticeStorageSpecification_T::value_type, LatticeStorageSpecification_T::Stencil::Size >
 {
 public:
 
@@ -38,17 +38,17 @@ public:
    using LatticeStorageSpecification = LatticeStorageSpecification_T;
    using Stencil = typename LatticeStorageSpecification_T::Stencil;
 
-   using value_type = typename GhostLayerField<real_t, Stencil::Size>::value_type;
+   using value_type = typename LatticeStorageSpecification_T::value_type;
 
-   using Ptr = typename GhostLayerField<real_t, Stencil::Size>::Ptr;
-   using ConstPtr = typename GhostLayerField<real_t, Stencil::Size>::ConstPtr;
+   using Ptr = typename GhostLayerField<value_type, Stencil::Size>::Ptr;
+   using ConstPtr = typename GhostLayerField<value_type, Stencil::Size>::ConstPtr;
    //@}
    //*******************************************************************************************************************
 
    PdfField( const uint_t _xSize, const uint_t _ySize, const uint_t _zSize,
             const LatticeStorageSpecification_T & storageSpecification,
              const uint_t ghostLayers = uint_t(1), const field::Layout & _layout = field::zyxf,
-             const shared_ptr< field::FieldAllocator<real_t> > & alloc = shared_ptr< field::FieldAllocator<real_t> >() );
+             const shared_ptr< field::FieldAllocator<value_type> > & alloc = shared_ptr< field::FieldAllocator<value_type> >() );
 
    ~PdfField() override = default;
 
@@ -61,19 +61,19 @@ public:
    // Access functions (with stencil::Direction!) //
    /////////////////////////////////////////////////
 
-   using GhostLayerField< real_t, Stencil::Size >::get;
+   using GhostLayerField< value_type, Stencil::Size >::get;
 
-         real_t & get( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d )       { return get( x, y, z, Stencil::idx[d] ); }
-   const real_t & get( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d ) const { return get( x, y, z, Stencil::idx[d] ); }
-         real_t & get( const Cell & c, stencil::Direction d )       { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); }
-   const real_t & get( const Cell & c, stencil::Direction d ) const { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); }
+         value_type & get( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d )       { return get( x, y, z, Stencil::idx[d] ); }
+   const value_type & get( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d ) const { return get( x, y, z, Stencil::idx[d] ); }
+         value_type & get( const Cell & c, stencil::Direction d )       { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); }
+   const value_type & get( const Cell & c, stencil::Direction d ) const { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); }
 
-   using GhostLayerField< real_t, Stencil::Size >::operator();
+   using GhostLayerField< value_type, Stencil::Size >::operator();
 
-         real_t & operator()( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d )       { return get( x, y, z, Stencil::idx[d] ); }
-   const real_t & operator()( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d ) const { return get( x, y, z, Stencil::idx[d] ); }
-         real_t & operator()( const Cell & c, stencil::Direction d )       { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); }
-   const real_t & operator()( const Cell & c, stencil::Direction d ) const { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); }
+         value_type & operator()( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d )       { return get( x, y, z, Stencil::idx[d] ); }
+   const value_type & operator()( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d ) const { return get( x, y, z, Stencil::idx[d] ); }
+         value_type & operator()( const Cell & c, stencil::Direction d )       { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); }
+   const value_type & operator()( const Cell & c, stencil::Direction d ) const { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); }
 
 
 protected:
@@ -81,7 +81,7 @@ protected:
    /*! \name Shallow Copy */
    //@{
    inline PdfField( const PdfField< LatticeStorageSpecification_T > & other );
-   Field< real_t, Stencil::Size > * cloneShallowCopyInternal() const override { return new PdfField< LatticeStorageSpecification_T >( *this ); }
+   Field< value_type, Stencil::Size > * cloneShallowCopyInternal() const override { return new PdfField< LatticeStorageSpecification_T >( *this ); }
    //@}
    //*******************************************************************************************************************
 
@@ -94,17 +94,17 @@ template< typename LatticeStorageSpecification_T >
 PdfField< LatticeStorageSpecification_T >::PdfField( const uint_t _xSize, const uint_t _ySize, const uint_t _zSize,
                                                     const LatticeStorageSpecification_T & storageSpecification,
                                       const uint_t ghostLayers, const field::Layout & _layout,
-                                      const shared_ptr< field::FieldAllocator<real_t> > & alloc ) :
+                                      const shared_ptr< field::FieldAllocator<value_type> > & alloc ) :
 
-   GhostLayerField< real_t, Stencil::Size >( _xSize, _ySize, _zSize, ghostLayers, _layout, alloc ),
+   GhostLayerField< value_type, Stencil::Size >( _xSize, _ySize, _zSize, ghostLayers, _layout, alloc ),
       storageSpecification_( storageSpecification )
 
 {
 #ifdef _OPENMP
    // take care of proper thread<->memory assignment (first-touch allocation policy !)
-   this->setWithGhostLayer( real_t(0) );
+   this->setWithGhostLayer( value_type(0) );
 #endif
-   this->setWithGhostLayer( real_t(0) );
+   this->setWithGhostLayer( value_type(0) );
 }
 
 
@@ -112,24 +112,24 @@ PdfField< LatticeStorageSpecification_T >::PdfField( const uint_t _xSize, const
 template< typename LatticeStorageSpecification_T >
 inline PdfField< LatticeStorageSpecification_T > * PdfField< LatticeStorageSpecification_T >::clone() const
 {
-   return dynamic_cast< PdfField * >( GhostLayerField< real_t, Stencil::Size >::clone() );
+   return dynamic_cast< PdfField * >( GhostLayerField< value_type, Stencil::Size >::clone() );
 }
 
 template< typename LatticeStorageSpecification_T >
 inline PdfField< LatticeStorageSpecification_T > * PdfField< LatticeStorageSpecification_T >::cloneUninitialized() const
 {
-   return dynamic_cast< PdfField * >( GhostLayerField< real_t, Stencil::Size >::cloneUninitialized() );
+   return dynamic_cast< PdfField * >( GhostLayerField< value_type, Stencil::Size >::cloneUninitialized() );
 }
 
 template< typename LatticeStorageSpecification_T >
 inline PdfField< LatticeStorageSpecification_T > * PdfField< LatticeStorageSpecification_T >::cloneShallowCopy() const
 {
-   return dynamic_cast< PdfField * >( GhostLayerField< real_t, Stencil::Size >::cloneShallowCopy() );
+   return dynamic_cast< PdfField * >( GhostLayerField< value_type, Stencil::Size >::cloneShallowCopy() );
 }
 
 template< typename LatticeStorageSpecification_T >
 inline PdfField< LatticeStorageSpecification_T >::PdfField( const PdfField< LatticeStorageSpecification_T > & other )
-   : GhostLayerField< real_t, Stencil::Size >::GhostLayerField( other )
+   : GhostLayerField< value_type, Stencil::Size >::GhostLayerField( other )
 {
 }
 
diff --git a/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.h b/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.h
index 8d95855e61fd4238c2c0f201024f87abe7111107..ff963ed5bf9f93373137635d7b81ff9255c3e0a4 100644
--- a/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.h
+++ b/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.h
@@ -33,6 +33,7 @@ namespace walberla
 {
 
 using gpu::communication::NonUniformGPUScheme;
+using BlockFunction = std::function<void (const uint_t)>; // parameters: level
 
 namespace lbm_generated
 {
@@ -67,26 +68,53 @@ class BasicRecursiveTimeStepGPU
                          "Template parameter PdfField_T is of different type than BlockDataID pdfFieldId that is "
                          "provided as constructor argument")
 #endif
+      useStreams_ = false;
       maxLevel_ = sbfs->getDepth();
+      streams_.resize(maxLevel_ + 1);
+      timestepPerLevel_.resize(maxLevel_ + 1);
 
-      for (uint_t level = 0; level <= maxLevel_; level++)
-      {
+      for (uint_t level = 0; level <= maxLevel_; level++){
          std::vector< Block* > blocks;
          sbfs->getBlocks(blocks, level);
          blocks_.push_back(blocks);
+         streams_[level].resize(nStreams_);
+         timestepPerLevel_[level] = uint8_c(0);
+      }
+      for (uint_t level = 0; level <= maxLevel_; level++){
+         for (uint_t i = 0; i < nStreams_; i++){
+            streams_[level][i] = nullptr;
+         }
       }
    };
 
-   ~BasicRecursiveTimeStepGPU() = default;
+   ~BasicRecursiveTimeStepGPU(){
+      if(useStreams_){
+         for (uint_t level = 0; level <= maxLevel_; level++){
+            for (uint_t i = 0; i < nStreams_; i++)
+               WALBERLA_GPU_CHECK(gpuStreamDestroy(streams_[level][i]))
+         }
+      }
+   }
+
+   void activateStreams(){
+      WALBERLA_LOG_INFO_ON_ROOT("Updating blocks using " << nStreams_ << " GPU Streams")
+      for (uint_t level = 0; level <= maxLevel_; level++){
+         for (uint_t i = 0; i < nStreams_; i++)
+            WALBERLA_GPU_CHECK(gpuStreamCreate(&streams_[level][i]))
+      }
+      useStreams_ = true;
+   }
+
 
    void operator()() { timestep(0); };
    void addRefinementToTimeLoop(SweepTimeloop& timeloop, uint_t level = 0);
-   void test(uint_t maxLevel, uint_t level = 0);
+   void addPostBoundaryHandlingBlockFunction( const BlockFunction & function );
 
  private:
    void timestep(uint_t level);
    void ghostLayerPropagation(Block* block, gpuStream_t gpuStream);
    std::function< void() > executeStreamCollideOnLevel(uint_t level, bool withGhostLayerPropagation = false);
+   std::function< void() > executePostBoundaryBlockFunctions(uint_t level);
 
    std::function< void() > executeBoundaryHandlingOnLevel(uint_t level);
 
@@ -100,6 +128,12 @@ class BasicRecursiveTimeStepGPU
 
    SweepCollection_T& sweepCollection_;
    BoundaryCollection_T& boundaryCollection_;
+   std::vector< BlockFunction >  globalPostBoundaryHandlingBlockFunctions_;
+
+   std::vector< std::vector< gpuStream_t >> streams_;
+   uint_t nStreams_{uint_c(6)};
+   bool useStreams_;
+   std::vector< uint8_t > timestepPerLevel_;
 };
 
 } // namespace lbm_generated
diff --git a/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.impl.h b/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.impl.h
index 6665cf3513df16ca09e7c0cfc7c66c35639b1869..0327b9b52422c7b45c5bba8bc6e4ea490fbe88e2 100644
--- a/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.impl.h
+++ b/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.impl.h
@@ -28,18 +28,13 @@ namespace lbm_generated {
 template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T >
 void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollection_T >::timestep(uint_t level)
 {
-   std::vector<Block *> blocks;
-   sbfs_->getBlocks(blocks, level);
-
-   uint_t maxLevel = sbfs_->getDepth();
-
    // 1.1 Collision
-   for(auto b: blocks){
+   for(auto b: blocks_[level]){
       sweepCollection_.streamCollide(b);
    }
 
    // 1.2 Recursive Descent
-   if(level < maxLevel){
+   if(level < maxLevel_){
       timestep(level + 1);
    }
 
@@ -52,13 +47,13 @@ void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollectio
    commScheme_->communicateEqualLevel(level);
 
    // 1.5 Boundary Handling and Coalescence Preparation
-   for(auto b : blocks){
+   for(auto b : blocks_[level]){
       boundaryCollection_(b, nullptr);
-      if(level != maxLevel) pdfFieldPackInfo_->prepareCoalescence(b);
+      if(level != maxLevel_) pdfFieldPackInfo_->prepareCoalescence(b);
    }
 
    // 1.6 Fine to Coarse Communication, receiving end
-   if(level < maxLevel){
+   if(level < maxLevel_){
       commScheme_->communicateFineToCoarse(level + 1);
    }
 
@@ -67,13 +62,13 @@ void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollectio
    if(level == 0) return;
 
    // 2.1 Collision and Ghost-Layer Propagation
-   for(auto b: blocks){
+   for(auto b: blocks_[level]){
       ghostLayerPropagation(b);  // GL-Propagation first without swapping arrays...
       sweepCollection_.streamCollide(b);                // then Stream-Collide on interior, and swap arrays
    }
 
    // 2.2 Recursive Descent
-   if(level < maxLevel){
+   if(level < maxLevel_){
       timestep(level + 1);
    }
 
@@ -81,13 +76,13 @@ void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollectio
    commScheme_->communicateEqualLevel(level);
 
    // 2.5 Boundary Handling and Coalescence Preparation
-   for(auto b : blocks){
+   for(auto b : blocks_[level]){
       boundaryCollection_(b, nullptr);
-      if(level != maxLevel) pdfFieldPackInfo_->prepareCoalescence(b);
+      if(level != maxLevel_) pdfFieldPackInfo_->prepareCoalescence(b);
    }
 
    // 2.6 Fine to Coarse Communication, receiving end
-   if(level < maxLevel){
+   if(level < maxLevel_){
       commScheme_->communicateFineToCoarse(level + 1);
    }
 }
@@ -115,6 +110,7 @@ void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollectio
 
    // 1.5 Boundary Handling and Coalescence Preparation
    timeloop.addFuncBeforeTimeStep(executeBoundaryHandlingOnLevel(level), "Refinement Cycle: boundary handling on level " + std::to_string(level));
+   timeloop.addFuncBeforeTimeStep(executePostBoundaryBlockFunctions(level), "Refinement Cycle: post boundary handling block functions on level " + std::to_string(level));
 
    // 1.6 Fine to Coarse Communication, receiving end
    if(level < maxLevel_){
@@ -138,6 +134,7 @@ void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollectio
 
    // 2.5 Boundary Handling and Coalescence Preparation
    timeloop.addFuncBeforeTimeStep(executeBoundaryHandlingOnLevel(level), "Refinement Cycle: boundary handling on level " + std::to_string(level));
+   timeloop.addFuncBeforeTimeStep(executePostBoundaryBlockFunctions(level), "Refinement Cycle: post boundary handling block functions on level " + std::to_string(level));
 
    // 2.6 Fine to Coarse Communication, receiving end
    if(level < maxLevel_)
@@ -145,91 +142,65 @@ void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollectio
 
 }
 
-template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T >
-void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollection_T >::test(uint_t maxLevel, uint_t level)
-{
-   // 1.1 Collision
-   WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: streamCollide on level " + std::to_string(level));
-
-   // 1.2 Recursive Descent
-   if(level < maxLevel){
-      test(maxLevel, level + 1);
-   }
-
-   // 1.3 Coarse to Fine Communication, receiving end
-   if(level != 0){
-      WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: communicate coarse to fine on level " + std::to_string(level));
-   }
-
-   // 1.4 Equal-Level Communication
-   WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: communicate equal level on level " + std::to_string(level));
-
-
-   // 1.5 Boundary Handling and Coalescence Preparation
-   WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: boundary handling on level " + std::to_string(level));
-
-   // 1.6 Fine to Coarse Communication, receiving end
-   if(level < maxLevel){
-      WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: communicate fine to coarse on level " + std::to_string(level + 1));
-   }
-
-   // Stop here if on coarsest level.
-   // Otherwise, continue to second subcycle.
-   if(level == 0) return;
-
-   // 2.1 Collision and Ghost-Layer Propagation
-   WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: streamCollide with ghost layer propagation on level " + std::to_string(level));
-
-   // 2.2 Recursive Descent
-   if(level < maxLevel)
-      test(maxLevel, level + 1);
-
-
-   // 2.4 Equal-Level Communication
-   WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: communicate equal level on level " + std::to_string(level));
-
-   // 2.5 Boundary Handling and Coalescence Preparation
-   WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: boundary handling on level " + std::to_string(level));
-
-   // 2.6 Fine to Coarse Communication, receiving end
-   if(level < maxLevel)
-      WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: communicate fine to coarse on level " + std::to_string(level + 1));
-
-}
-
 
 template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T >
 std::function<void()> BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollection_T >::executeStreamCollideOnLevel(uint_t level, bool withGhostLayerPropagation)
 {
-   return [level, withGhostLayerPropagation, this]()
-   {
-      if (withGhostLayerPropagation)
-      {
-         for(auto b: blocks_[level]){
-            ghostLayerPropagation(b, nullptr);
-            sweepCollection_.streamCollide(b, 0, nullptr);
+   if(sweepCollection_.blockWise()){
+      return [level, withGhostLayerPropagation, this](){
+         if (withGhostLayerPropagation){
+            const uint8_t timestepPlusOne = (timestepPerLevel_[level] + 1) & 1;
+            sweepCollection_.ghostLayerPropagation(level, timestepPlusOne);
+            WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+            WALBERLA_GPU_CHECK(gpuPeekAtLastError())
+            timestepPerLevel_[level] = (timestepPerLevel_[level] + 1) & 1;
+            sweepCollection_.streamCollideOverBlocks(level, timestepPerLevel_[level]);
+            for (uint_t i = 0; i < blocks_[level].size(); i++){
+               auto pdfs = blocks_[level][i]->getData< PdfField_T >(pdfFieldId_);
+               pdfs->advanceTimestep();
+            }
+            commScheme_->setTimestepForLevel(level, timestepPerLevel_[level]);
+            WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+            WALBERLA_GPU_CHECK(gpuPeekAtLastError())
          }
-      }
-      else
-      {
-         for(auto b: blocks_[level]){
-            sweepCollection_.streamCollide(b, 0, nullptr);
+         else{
+            timestepPerLevel_[level] = (timestepPerLevel_[level] + 1) & 1;
+            sweepCollection_.streamCollideOverBlocks(level, timestepPerLevel_[level]);
+            for (uint_t i = 0; i < blocks_[level].size(); i++){
+               auto pdfs = blocks_[level][i]->getData< PdfField_T >(pdfFieldId_);
+               pdfs->advanceTimestep();
+            }
+            commScheme_->setTimestepForLevel(level, timestepPerLevel_[level]);
+            WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+            WALBERLA_GPU_CHECK(gpuPeekAtLastError())
          }
-      }
-      WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
-   };
+      };
+   }
+   else{
+      return [level, withGhostLayerPropagation, this](){
+         if (withGhostLayerPropagation){
+            for (uint_t i = 0; i < blocks_[level].size(); i++){
+               ghostLayerPropagation(blocks_[level][i], streams_[level][i % nStreams_]);
+               sweepCollection_.streamCollide(blocks_[level][i], 0, streams_[level][i % nStreams_]);
+            }
+         }
+         else{
+            for (uint_t i = 0; i < blocks_[level].size(); i++){
+               sweepCollection_.streamCollide(blocks_[level][i], 0, streams_[level][i % nStreams_]);
+            }
+         }
+      };
+   }
 }
 
 template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T >
 std::function<void()> BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollection_T >::executeBoundaryHandlingOnLevel(uint_t level)
 {
    return [this, level]() {
-      for (auto b : blocks_[level])
-      {
-         boundaryCollection_(b, nullptr);
-         if (level != maxLevel_) pdfFieldPackInfo_->prepareCoalescence(b, nullptr);
+      for (uint_t i = 0; i < blocks_[level].size(); i++){
+         boundaryCollection_(blocks_[level][i], streams_[level][i % nStreams_]);
+         if (level != maxLevel_) pdfFieldPackInfo_->prepareCoalescence(blocks_[level][i], streams_[level][i % nStreams_]);
       }
-      WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
    };
 }
 
@@ -251,5 +222,23 @@ void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollectio
    }
 }
 
+template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T >
+std::function<void()> BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollection_T >::executePostBoundaryBlockFunctions(uint_t level)
+{
+   return [this, level]() {
+      for( const auto& func : globalPostBoundaryHandlingBlockFunctions_ ){
+         func(level);
+      }
+   };
+}
+
+
+template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T >
+inline void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollection_T >::addPostBoundaryHandlingBlockFunction( const BlockFunction & function )
+{
+   globalPostBoundaryHandlingBlockFunctions_.emplace_back( function );
+}
+
+
 } // namespace lbm_generated
 } // namespace walberla
diff --git a/src/lbm_generated/gpu/GPUPdfField.h b/src/lbm_generated/gpu/GPUPdfField.h
index 1a9f59a116b8c4e7c5fcb4ebd817dcb5cad0a908..f67a84fef56414271d6a70e81e36e37799c024b0 100644
--- a/src/lbm_generated/gpu/GPUPdfField.h
+++ b/src/lbm_generated/gpu/GPUPdfField.h
@@ -28,7 +28,7 @@ using namespace walberla::gpu;
 namespace walberla::lbm_generated {
 
 template< typename LatticeStorageSpecification_T >
-class GPUPdfField : public GPUField< real_t >
+class GPUPdfField : public GPUField< typename LatticeStorageSpecification_T::value_type >
 {
  public:
 
@@ -38,7 +38,7 @@ class GPUPdfField : public GPUField< real_t >
    using LatticeStorageSpecification = LatticeStorageSpecification_T;
    using Stencil = typename LatticeStorageSpecification_T::Stencil;
 
-   using value_type = typename GPUField<real_t>::value_type;
+   using value_type = typename LatticeStorageSpecification_T::value_type;
    //@}
    //*******************************************************************************************************************
 
@@ -59,7 +59,7 @@ template< typename LatticeStorageSpecification_T >
 GPUPdfField< LatticeStorageSpecification_T >::GPUPdfField( uint_t _xSize, uint_t _ySize, uint_t _zSize,
                                                           const LatticeStorageSpecification_T & storageSpecification,
                                                           uint_t ghostLayers, const Layout & layout, bool usePitchedMem) :
-                    GPUField< real_t>( _xSize, _ySize, _zSize, LatticeStorageSpecification_T::Stencil::Size, ghostLayers, layout, usePitchedMem ), storageSpecification_( storageSpecification )
+                    GPUField<value_type>( _xSize, _ySize, _zSize, LatticeStorageSpecification_T::Stencil::Size, ghostLayers, layout, usePitchedMem ), storageSpecification_( storageSpecification )
 {
 }
 
diff --git a/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h b/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h
index 585d1db348cbf0e1b5572f563b48ff55a717e9ec..a780bf48205df0847e9a2b85ff8efd0e97a396f0 100644
--- a/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h
+++ b/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h
@@ -42,6 +42,7 @@ template< typename PdfField_T, bool inplace >
 class NonuniformGPUPackingKernelsWrapper
 {
  public:
+   using value_type = typename PdfField_T::value_type;
    void packAll(PdfField_T* srcField, CellInterval ci, unsigned char* outBuffer, gpuStream_t stream ) const  = 0;
    void unpackAll(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer, gpuStream_t stream ) const = 0;
    void localCopyAll(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
@@ -49,8 +50,15 @@ class NonuniformGPUPackingKernelsWrapper
 
    void packDirection(PdfField_T* srcField, CellInterval ci, unsigned char* outBuffer, Direction dir, gpuStream_t stream ) const  = 0;
    void unpackDirection(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer, Direction dir, gpuStream_t stream ) const = 0;
-   void localCopyDirection(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
-                           CellInterval dstInterval, Direction dir, gpuStream_t stream) const               = 0;
+   void localCopyDirection(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField, CellInterval dstInterval, Direction dir, gpuStream_t stream) const = 0;
+   void blockLocalCopyDirection(value_type** data_pdfs_src_dp, value_type** data_pdfs_dst_dp, Direction dir, uint8_t timestep, gpuStream_t stream, std::array<int64_t, 4>& sizes, std::array<int64_t, 4>& strides) const = 0;
+
+
+   void localCopyRedistribute(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
+                              CellInterval dstInterval, Direction dir, gpuStream_t stream) const = 0;
+
+   void localPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskFieldGPU* maskField, CellInterval srcInterval,
+                                PdfField_T* dstField, CellInterval dstInterval, Direction dir, gpuStream_t stream) const = 0;
 
    void unpackRedistribute(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer,
                            stencil::Direction dir, gpuStream_t stream ) const = 0;
@@ -64,6 +72,8 @@ class NonuniformGPUPackingKernelsWrapper
    uint_t size(CellInterval ci) const                                  = 0;
    uint_t redistributeSize(CellInterval ci) const                      = 0;
    uint_t partialCoalescenceSize(CellInterval ci, Direction dir) const = 0;
+
+   bool blockWise() const = 0;
 };
 
 /*
@@ -75,6 +85,7 @@ class NonuniformGPUPackingKernelsWrapper< PdfField_T, false >
  public:
    using LatticeStorageSpecification_T = typename PdfField_T::LatticeStorageSpecification;
    using PackingKernels_T              = typename LatticeStorageSpecification_T::PackKernels;
+   using value_type                    = typename PdfField_T::value_type;
 
    void packAll(PdfField_T* srcField, CellInterval ci, unsigned char* outBuffer, gpuStream_t stream = nullptr) const
    {
@@ -108,6 +119,23 @@ class NonuniformGPUPackingKernelsWrapper< PdfField_T, false >
       kernels_.localCopyDirection(srcField, srcInterval, dstField, dstInterval, dir, stream);
    }
 
+   void blockLocalCopyDirection(value_type** data_pdfs_src_dp, value_type** data_pdfs_dst_dp, Direction dir, uint8_t /*timestep*/, gpuStream_t stream, std::array<int64_t, 4>& sizes, std::array<int64_t, 4>& strides) const
+   {
+      kernels_.localCopyDirection(data_pdfs_src_dp, data_pdfs_dst_dp, dir, stream, sizes, strides);
+   }
+
+   void localCopyRedistribute(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
+                              CellInterval dstInterval, Direction dir, gpuStream_t stream) const
+   {
+      kernels_.localCopyRedistribute(srcField, srcInterval, dstField, dstInterval, dir, stream);
+   }
+
+   void localPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskFieldGPU* maskField, CellInterval srcInterval,
+                                PdfField_T* dstField, CellInterval dstInterval, Direction dir, gpuStream_t stream) const
+   {
+      kernels_.localPartialCoalescence(srcField, maskField, srcInterval, dstField, dstInterval, dir, stream);
+   }
+
    void unpackRedistribute(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer,
                            stencil::Direction dir, gpuStream_t stream = nullptr) const
    {
@@ -138,6 +166,8 @@ class NonuniformGPUPackingKernelsWrapper< PdfField_T, false >
       return kernels_.partialCoalescenceSize(ci, dir);
    }
 
+   bool blockWise() const {return kernels_.blockWise;}
+
  private:
    PackingKernels_T kernels_;
 };
@@ -151,6 +181,7 @@ class NonuniformGPUPackingKernelsWrapper< PdfField_T, true >
  public:
    using LatticeStorageSpecification_T = typename PdfField_T::LatticeStorageSpecification;
    using PackingKernels_T              = typename LatticeStorageSpecification_T::PackKernels;
+   using value_type                    = typename PdfField_T::value_type;
 
    void packAll(PdfField_T* srcField, CellInterval ci, unsigned char* outBuffer, gpuStream_t stream = nullptr) const
    {
@@ -192,10 +223,39 @@ class NonuniformGPUPackingKernelsWrapper< PdfField_T, true >
       kernels_.localCopyDirection(srcField, srcInterval, dstField, dstInterval, dir, timestep, stream);
    }
 
+   void blockLocalCopyDirection(value_type** data_pdfs_src_dp, value_type** data_pdfs_dst_dp, Direction dir, uint8_t timestep, gpuStream_t stream, std::array<int64_t, 4>& sizes, std::array<int64_t, 4>& strides) const
+   {
+      kernels_.localCopyDirection(data_pdfs_src_dp, data_pdfs_dst_dp, dir, timestep, stream, sizes, strides);
+   }
+
+
+   void localCopyRedistribute(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
+                              CellInterval dstInterval, Direction dir, gpuStream_t stream) const
+   {
+      uint8_t timestep = srcField->getTimestep();
+      WALBERLA_ASSERT(!((dstField->getTimestep() & 1) ^ 1), "When the course to fine step is executed, the fine Field must "
+                                                            "be on an odd timestep, while the source field could either be "
+                                                            "on an even or an odd state.")
+      kernels_.localCopyRedistribute(srcField, srcInterval, dstField, dstInterval, dir, timestep, stream);
+   }
+
+   void localPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskFieldGPU* maskField, CellInterval srcInterval,
+                                PdfField_T* dstField, CellInterval dstInterval, Direction dir, gpuStream_t stream) const
+   {
+      uint8_t timestep = dstField->getTimestep();
+      WALBERLA_ASSERT((srcField->getTimestep() & 1) ^ 1, "When the fine to coarse step is executed, the fine Field must "
+                                                          "be on an even timestep, while the source field could either be "
+                                                          "on an even or an odd state.")
+      kernels_.localPartialCoalescence(srcField, maskField, srcInterval, dstField, dstInterval, dir, timestep, stream);
+   }
+
    void unpackRedistribute(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer,
                            stencil::Direction dir, gpuStream_t stream = nullptr) const
    {
       uint8_t timestep = dstField->getTimestep();
+      WALBERLA_ASSERT(!((dstField->getTimestep() & 1) ^ 1), "When the course to fine step is executed, the fine Field must "
+                                                            "be on an odd timestep, while the source field could either be "
+                                                            "on an even or an odd state.")
       kernels_.unpackRedistribute(dstField, ci, inBuffer, dir, timestep, stream);
    }
 
@@ -203,6 +263,9 @@ class NonuniformGPUPackingKernelsWrapper< PdfField_T, true >
                                unsigned char* outBuffer, Direction dir, gpuStream_t stream = nullptr) const
    {
       uint8_t timestep = srcField->getTimestep();
+      WALBERLA_ASSERT((srcField->getTimestep() & 1) ^ 1, "When the fine to coarse step is executed, the fine Field must "
+                                                         "be on an even timestep, while the source field could either be "
+                                                         "on an even or an odd state.")
       kernels_.packPartialCoalescence(srcField, maskField, ci, outBuffer, dir, timestep, stream);
    }
 
@@ -226,6 +289,8 @@ class NonuniformGPUPackingKernelsWrapper< PdfField_T, true >
       return kernels_.partialCoalescenceSize(ci, dir);
    }
 
+   bool blockWise() const {return kernels_.blockWise;}
+
  private:
    PackingKernels_T kernels_;
 };
@@ -243,17 +308,52 @@ class NonuniformGeneratedGPUPdfPackInfo : public walberla::gpu::GeneratedNonUnif
    using Stencil                       = typename LatticeStorageSpecification_T::Stencil;
    using CommunicationStencil          = typename LatticeStorageSpecification_T::CommunicationStencil;
    using CommData_T                    = NonuniformGPUCommData< LatticeStorageSpecification_T >;
+   using value_type                    = typename PdfField_T::value_type;
+
+   NonuniformGeneratedGPUPdfPackInfo(const uint64_t meshLevels, const BlockDataID pdfFieldID, const BlockDataID commDataID)
+      : pdfFieldID_(pdfFieldID), commDataID_(commDataID){ init(meshLevels); };
+
+   void init(const uint64_t meshLevels){
+      auto size = meshLevels * Stencil::Q;
+      equalCommSRC.resize(size);
+      equalCommDST.resize(size);
+      equalCommSRCGPU.resize(size);
+      equalCommDSTGPU.resize(size);
 
-   NonuniformGeneratedGPUPdfPackInfo(const BlockDataID pdfFieldID, const BlockDataID commDataID)
-      : pdfFieldID_(pdfFieldID), commDataID_(commDataID){};
+   }
+
+   void sync() override {
+      for (uint_t i = 0; i < equalCommSRC.size(); i++){
+         for (auto const& x : equalCommSRC[i]){
+            auto key = x.first;
+            WALBERLA_GPU_CHECK(gpuMalloc((void**) &equalCommSRCGPU[i][key], sizeof(value_type*) * equalCommSRC[i][key].size()));
+            WALBERLA_GPU_CHECK(gpuMemcpy(equalCommSRCGPU[i][key], &equalCommSRC[i][key][0],sizeof(value_type*) * equalCommSRC[i][key].size(), gpuMemcpyHostToDevice));
+
+            WALBERLA_GPU_CHECK(gpuMalloc((void**) &equalCommDSTGPU[i][key], sizeof(value_type*) * equalCommDST[i][key].size()));
+            WALBERLA_GPU_CHECK(gpuMemcpy(equalCommDSTGPU[i][key], &equalCommDST[i][key][0],sizeof(value_type*) * equalCommDST[i][key].size(), gpuMemcpyHostToDevice));
+
+         }
+      }
+   }
+
+   ~NonuniformGeneratedGPUPdfPackInfo() {
+      for (uint_t i = 0; i < equalCommSRC.size(); i++){
+         for (auto const& x : equalCommSRC[i]){
+            auto key = x.first;
+            WALBERLA_GPU_CHECK(gpuFree(equalCommSRCGPU[i][key]))
+            WALBERLA_GPU_CHECK(gpuFree(equalCommDSTGPU[i][key]))
+         }
+      }
+   }
 
    bool constantDataExchange() const override { return true; };
    bool threadsafeReceiving() const override { return false; };
 
    /// Equal Level
    void unpackDataEqualLevel(Block* receiver, Direction dir, GpuBuffer_T& buffer, gpuStream_t stream) override;
-   void communicateLocalEqualLevel(const Block* sender, Block* receiver, stencil::Direction dir,
-                                   gpuStream_t stream) override;
+   void addForLocalEqualLevelComm(const Block* sender, Block* receiver, stencil::Direction dir) override;
+   void communicateLocalEqualLevel(uint64_t level, uint8_t timestep, gpuStream_t stream) override;
+   void communicateLocalEqualLevel(const Block* sender, Block* receiver, stencil::Direction dir, gpuStream_t stream) override;
 
    /// Coarse to Fine
    void unpackDataCoarseToFine(Block* fineReceiver, const BlockID& coarseSender, stencil::Direction dir,
@@ -291,7 +391,7 @@ class NonuniformGeneratedGPUPdfPackInfo : public walberla::gpu::GeneratedNonUnif
    bool areNeighborsInDirection(const Block* block, const BlockID& neighborID,
                                 Vector3< cell_idx_t > dirVec) const;
 
-   CellInterval intervalHullInDirection(const CellInterval& ci, Vector3< cell_idx_t > dirVec,
+   CellInterval intervalHullInDirection(const CellInterval& ci, Vector3< cell_idx_t > tangentialDir,
                                         cell_idx_t width) const;
    bool skipsThroughCoarseBlock(const Block* block, Direction dir) const;
 
@@ -306,6 +406,14 @@ class NonuniformGeneratedGPUPdfPackInfo : public walberla::gpu::GeneratedNonUnif
    const BlockDataID pdfFieldID_;
    internal::NonuniformGPUPackingKernelsWrapper< PdfField_T, LatticeStorageSpecification_T::inplace > kernels_;
 
+   std::array<int64_t, 4> strides;
+
+   std::vector<std::unordered_map<Vector3<int64_t>,std::vector<value_type*>>> equalCommSRC;
+   std::vector<std::unordered_map<Vector3<int64_t>,std::vector<value_type*>>> equalCommDST;
+
+   std::vector<std::unordered_map<Vector3<int64_t>,value_type **>> equalCommSRCGPU;
+   std::vector<std::unordered_map<Vector3<int64_t>,value_type **>> equalCommDSTGPU;
+
  public:
    const BlockDataID commDataID_;
 };
diff --git a/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.impl.h b/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.impl.h
index 7ff9c7fd3cf5383499f51d9b17bc995f45450ef2..3f0b0ad585cf9ab503f8fcd3c6b0be54e656d211 100644
--- a/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.impl.h
+++ b/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.impl.h
@@ -55,7 +55,7 @@ std::shared_ptr< NonuniformGeneratedGPUPdfPackInfo< PdfField_T > >
    auto handling = std::make_shared<NonuniformGPUCommDataHandling< LatticeStorageSpecification_T > >(blocks);
    BlockDataID commDataID = sbf->addBlockData(handling, dataIdentifier);
 
-   return std::make_shared<NonuniformGeneratedGPUPdfPackInfo< PdfField_T > >(pdfFieldID, commDataID);
+   return std::make_shared<NonuniformGeneratedGPUPdfPackInfo< PdfField_T > >(sbf->getNumberOfLevels(), pdfFieldID, commDataID);
 }
 
 
@@ -81,6 +81,9 @@ template< typename PdfField_T>
 void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::communicateLocalEqualLevel(
    const Block* sender, Block* receiver, stencil::Direction dir, gpuStream_t stream)
 {
+   if(kernels_.blockWise())
+      return;
+
    auto srcField = const_cast< Block* >(sender)->getData< PdfField_T >(pdfFieldID_);
    auto dstField = receiver->getData< PdfField_T >(pdfFieldID_);
 
@@ -92,6 +95,57 @@ void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::communicateLocalEqualLevel
    kernels_.localCopyDirection(srcField, srcRegion, dstField, dstRegion, dir, stream);
 }
 
+template< typename PdfField_T>
+void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::addForLocalEqualLevelComm(
+   const Block* sender, Block* receiver, stencil::Direction dir)
+{
+   if(!kernels_.blockWise())
+      return;
+   const uint_t level = sender->getLevel();
+   auto srcField = const_cast< Block* >(sender)->getData< PdfField_T >(pdfFieldID_);
+   auto dstField = receiver->getData< PdfField_T >(pdfFieldID_);
+
+   CellInterval srcRegion;
+   CellInterval dstRegion;
+   cell_idx_t gls = skipsThroughCoarseBlock(sender, dir) ? 2 : 1;
+   srcField->getSliceBeforeGhostLayer(dir, srcRegion, gls, false);
+   dstField->getGhostRegion(stencil::inverseDir[dir], dstRegion, gls, false);
+
+   strides[0] = int64_t(srcField->xStride());
+   strides[1] = int64_t(srcField->yStride());
+   strides[2] = int64_t(srcField->zStride());
+   strides[3] = int64_t(1 * int64_t(srcField->fStride()));
+
+   value_type* data_pdfs_dst = dstField->dataAt(dstRegion.xMin(), dstRegion.yMin(), dstRegion.zMin(), 0);
+   value_type* data_pdfs_src = srcField->dataAt(srcRegion.xMin(), srcRegion.yMin(), srcRegion.zMin(), 0);
+
+   const uint_t index = level * Stencil::Q + dir;
+   Vector3<int64_t> size(int64_c(srcRegion.xSize()), int64_c(srcRegion.ySize()), int64_c(srcRegion.zSize()));
+
+   equalCommDST[index][size].emplace_back(data_pdfs_dst);
+   equalCommSRC[index][size].emplace_back(data_pdfs_src);
+}
+
+
+template< typename PdfField_T>
+void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::communicateLocalEqualLevel(uint64_t level, uint8_t timestep, gpuStream_t stream)
+{
+   if(!kernels_.blockWise())
+      return;
+
+   for (auto dir = CommunicationStencil::beginNoCenter(); dir != CommunicationStencil::end(); ++dir){
+      const uint_t index = level * Stencil::Q + *dir;
+      for (auto const& x : equalCommSRC[index]){
+         auto key = x.first;
+         value_type** data_pdfs_src_dp = equalCommSRCGPU[index][key];
+         value_type** data_pdfs_dst_dp = equalCommDSTGPU[index][key];
+         std::array< int64_t, 4 > size = { int64_c(equalCommSRC[index][key].size()), key[0], key[1], key[2] };
+
+         kernels_.blockLocalCopyDirection(data_pdfs_src_dp, data_pdfs_dst_dp, *dir, timestep, stream, size, strides);
+      }
+   }
+}
+
 
 template< typename PdfField_T>
 void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::packDataEqualLevelImpl(
@@ -168,21 +222,13 @@ void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::communicateLocalCoarseToFi
       Direction const unpackDir      = dstIntervals[index].first;
       CellInterval dstInterval = dstIntervals[index].second;
 
-      uint_t packSize      = kernels_.size(srcInterval);
-
 #ifndef NDEBUG
       Direction const packDir        = srcIntervals[index].first;
       WALBERLA_ASSERT_EQUAL(packDir, stencil::inverseDir[unpackDir])
       uint_t unpackSize = kernels_.redistributeSize(dstInterval);
-      WALBERLA_ASSERT_EQUAL(packSize, unpackSize)
+      WALBERLA_ASSERT_EQUAL(kernels_.size(srcInterval), unpackSize)
 #endif
-
-      // TODO: This is a dirty workaround. Code-generate direct redistribution!
-      unsigned char *buffer;
-      WALBERLA_GPU_CHECK( gpuMalloc( &buffer, packSize))
-      kernels_.packAll(srcField, srcInterval, buffer, stream);
-      kernels_.unpackRedistribute(dstField, dstInterval, buffer, unpackDir, stream);
-      WALBERLA_GPU_CHECK(gpuFree(buffer))
+      kernels_.localCopyRedistribute(srcField, srcInterval, dstField, dstInterval, unpackDir, stream);
    }
 }
 
@@ -190,6 +236,9 @@ template< typename PdfField_T>
 void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::communicateLocalCoarseToFine(
    const Block* coarseSender, Block* fineReceiver, stencil::Direction dir, GpuBuffer_T & buffer, gpuStream_t stream)
 {
+   // WARNING: This function uses an inplace buffer array.
+   // If possible the direct communicateLocalCoarseToFine without buffer array should be used
+
    auto srcField = const_cast< Block* >(coarseSender)->getData< PdfField_T >(pdfFieldID_);
    auto dstField = fineReceiver->getData< PdfField_T >(pdfFieldID_);
 
@@ -269,22 +318,16 @@ void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::communicateLocalFineToCoar
 
    CellInterval srcInterval;
    srcField->getGhostRegion(dir, srcInterval, 2);
-   uint_t packSize = kernels_.partialCoalescenceSize(srcInterval, dir);
 
    CellInterval dstInterval = getCoarseBlockCoalescenceInterval(coarseReceiver, fineSender->getId(),
                                                                 invDir, dstField);
 
 #ifndef NDEBUG
    uint_t unpackSize = kernels_.size(dstInterval, invDir);
-   WALBERLA_ASSERT_EQUAL(packSize, unpackSize)
+   WALBERLA_ASSERT_EQUAL(kernels_.partialCoalescenceSize(srcInterval, dir), unpackSize)
 #endif
 
-   // TODO: This is a dirty workaround. Code-generate direct redistribution!
-   unsigned char *buffer;
-   WALBERLA_GPU_CHECK( gpuMalloc( &buffer, packSize))
-   kernels_.packPartialCoalescence(srcField, maskField, srcInterval, buffer, dir, stream);
-   kernels_.unpackCoalescence(dstField, dstInterval, buffer, invDir, stream);
-   WALBERLA_GPU_CHECK(gpuFree(buffer))
+   kernels_.localPartialCoalescence(srcField, maskField, srcInterval, dstField, dstInterval, dir, stream);
 }
 
 
@@ -425,7 +468,7 @@ inline Vector3< cell_idx_t >
 }
 
 /**
- * Returns the part of a cell interval's hull of given \p width in direction \p dirVec.
+ * Returns the part of a cell interval's hull of given width in direction dirVec.
  * @param ci        The original cell interval
  * @param dirVec    Direction Vector
  * @param width     Width of the hull
diff --git a/src/lbm_generated/refinement/RefinementScaling.h b/src/lbm_generated/refinement/RefinementScaling.h
index f8015946a4816e4c0e7c54ea43d2f310755aaec3..abee51e715e578c22490edbe7b3e743156058a66 100644
--- a/src/lbm_generated/refinement/RefinementScaling.h
+++ b/src/lbm_generated/refinement/RefinementScaling.h
@@ -20,44 +20,18 @@
 
 #pragma once
 
-#include "blockforest/BlockDataHandling.h"
+#include "core/DataTypes.h"
 
-#include "domain_decomposition/IBlock.h"
-#include "domain_decomposition/StructuredBlockStorage.h"
-
-namespace walberla
-{
-namespace lbm_generated
+namespace walberla::lbm_generated
 {
 
-class DefaultRefinementScaling : public blockforest::AlwaysInitializeBlockDataHandling< real_t >
+inline real_t relaxationRateScaling( real_t relaxationRate, uint_t refinementLevel )
 {
- public:
-   DefaultRefinementScaling(const weak_ptr< StructuredBlockStorage >& blocks, const real_t parameter)
-      : blocks_(blocks), parameter_(parameter){};
-
-   real_t* initialize(IBlock* const block) override
-   {
-      WALBERLA_ASSERT_NOT_NULLPTR(block)
-      auto blocks = blocks_.lock();
-      WALBERLA_CHECK_NOT_NULLPTR(blocks)
-
-      level_ = block->getBlockStorage().getLevel(*block);
-
-      const real_t level_scale_factor = real_c(uint_t(1) << level_);
-      const real_t one                = real_c(1.0);
-      const real_t half               = real_c(0.5);
-
-      return new real_t(parameter_ / (level_scale_factor * (-parameter_ * half + one) + parameter_ * half));
-   }
-   bool operator==(const DefaultRefinementScaling& other) const { return level_ == other.level_; }
-
- private:
-   const weak_ptr< StructuredBlockStorage > blocks_;
-   const real_t parameter_;
+   const real_t levelScaleFactor = real_c(uint_c(1) << refinementLevel);
+   const real_t one                = real_c(1.0);
+   const real_t half               = real_c(0.5);
 
-   uint_t level_;
-};
+   return real_c(relaxationRate / (levelScaleFactor * (-relaxationRate * half + one) + relaxationRate * half));
+}
 
-} // namespace lbm_generated
-} // namespace walberla
\ No newline at end of file
+} // namespace walberla::lbm_generated
\ No newline at end of file
diff --git a/src/lbm_generated/storage_specification/D3Q19StorageSpecification.cpp b/src/lbm_generated/storage_specification/D3Q19StorageSpecification.cpp
index f36797eecca7282cf1f615492ac54cee38be871f..8a080f8633b6e7b21fdf5f4369290f7ac0d289aa 100644
--- a/src/lbm_generated/storage_specification/D3Q19StorageSpecification.cpp
+++ b/src/lbm_generated/storage_specification/D3Q19StorageSpecification.cpp
@@ -27,257 +27,108 @@
 #   pragma GCC diagnostic ignored "-Wunused-variable"
 #endif
 
-/*************************************************************************************
+namespace walberla {
+namespace lbm {
+
+   /*************************************************************************************
  *                                Kernel Definitions
 *************************************************************************************/
-namespace internal_d3q19storagespecification_pack_ALL {
+   namespace internal_d3q19storagespecification_pack_ALL {
 static FUNC_PREFIX void d3q19storagespecification_pack_ALL(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_30 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0;
-      double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_30_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_30;
-         double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31;
-         double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32;
-         double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33;
-         double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34;
-         double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35;
-         double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36;
-         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
-         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
-         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
-         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
-         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
-         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
-         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
-         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
-         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
-         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
-         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
-         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2] = _data_pdfs_src_00_30_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 1] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 3] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 4] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 5] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 6] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 7] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 8] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 9] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 10] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 11] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 12] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 13] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 14] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 15] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 16] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 17] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 18] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2];
+            _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3];
+            _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3];
+            _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3];
+            _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3];
+            _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 5] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3];
+            _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 6] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3];
+            _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 7] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+            _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 8] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+            _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 9] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+            _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 10] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+            _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 11] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+            _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 12] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+            _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 13] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+            _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 14] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
+            _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 15] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
+            _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 16] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
+            _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 17] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
+            _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 18] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_unpack_ALL {
-static FUNC_PREFIX void d3q19storagespecification_unpack_ALL(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+   namespace internal_d3q19storagespecification_unpack_ALL {
+static FUNC_PREFIX void d3q19storagespecification_unpack_ALL(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_30 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0;
-      double * RESTRICT  _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_30_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_30;
-         double * RESTRICT  _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31;
-         double * RESTRICT  _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32;
-         double * RESTRICT  _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33;
-         double * RESTRICT  _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34;
-         double * RESTRICT  _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35;
-         double * RESTRICT  _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36;
-         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
-         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
-         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
-         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
-         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
-         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
-         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
-         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
-         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
-         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
-         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
-         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_30_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2];
-            _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 1];
-            _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 2];
-            _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 3];
-            _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 4];
-            _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 5];
-            _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 6];
-            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 7];
-            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 8];
-            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 9];
-            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 10];
-            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 11];
-            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 12];
-            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 13];
-            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 14];
-            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 15];
-            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 16];
-            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 17];
-            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 18];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 4];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 5];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 6];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 7];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 8];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 9];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 10];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 11];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 12];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 13];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 14];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 15];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 16];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 17];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 18];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_localCopy_ALL {
+   namespace internal_d3q19storagespecification_localCopy_ALL {
 static FUNC_PREFIX void d3q19storagespecification_localCopy_ALL(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_30 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0;
-      double * RESTRICT _data_pdfs_src_00_30 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0;
-      double * RESTRICT  _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_30_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_30;
-         double * RESTRICT _data_pdfs_src_00_30_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_30;
-         double * RESTRICT  _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31;
-         double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31;
-         double * RESTRICT  _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32;
-         double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32;
-         double * RESTRICT  _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33;
-         double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33;
-         double * RESTRICT  _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34;
-         double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34;
-         double * RESTRICT  _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35;
-         double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35;
-         double * RESTRICT  _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36;
-         double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36;
-         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
-         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
-         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
-         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
-         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
-         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
-         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
-         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
-         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
-         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
-         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
-         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
-         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
-         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
-         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
-         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
-         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
-         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
-         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
-         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
-         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
-         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
-         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
-         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_30_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_30_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
          }
       }
    }
@@ -285,36 +136,36 @@ static FUNC_PREFIX void d3q19storagespecification_localCopy_ALL(double * RESTRIC
 }
 
 
-namespace internal_d3q19storagespecification_pack_TE {
-static FUNC_PREFIX void d3q19storagespecification_pack_TE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+   namespace internal_d3q19storagespecification_pack_B {
+static FUNC_PREFIX void d3q19storagespecification_pack_B(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_pack_SW {
-static FUNC_PREFIX void d3q19storagespecification_pack_SW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_BW {
+static FUNC_PREFIX void d3q19storagespecification_pack_BW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
          }
       }
    }
@@ -324,81 +175,65 @@ static FUNC_PREFIX void d3q19storagespecification_pack_SW(double * RESTRICT  _da
 namespace internal_d3q19storagespecification_pack_T {
 static FUNC_PREFIX void d3q19storagespecification_pack_T(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35;
-         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
-         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
-         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
-         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 1] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 3] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 4] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_pack_BS {
-static FUNC_PREFIX void d3q19storagespecification_pack_BS(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_NW {
+static FUNC_PREFIX void d3q19storagespecification_pack_NW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_pack_TN {
-static FUNC_PREFIX void d3q19storagespecification_pack_TN(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_SW {
+static FUNC_PREFIX void d3q19storagespecification_pack_SW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_pack_BW {
-static FUNC_PREFIX void d3q19storagespecification_pack_BW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_BS {
+static FUNC_PREFIX void d3q19storagespecification_pack_BS(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
          }
       }
    }
@@ -408,809 +243,617 @@ static FUNC_PREFIX void d3q19storagespecification_pack_BW(double * RESTRICT  _da
 namespace internal_d3q19storagespecification_pack_N {
 static FUNC_PREFIX void d3q19storagespecification_pack_N(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31;
-         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
-         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
-         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
-         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 1] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 3] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 4] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_pack_E {
-static FUNC_PREFIX void d3q19storagespecification_pack_E(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_BE {
+static FUNC_PREFIX void d3q19storagespecification_pack_BE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34;
-         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
-         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
-         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
-         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 1] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 3] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 4] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_pack_NW {
-static FUNC_PREFIX void d3q19storagespecification_pack_NW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_W {
+static FUNC_PREFIX void d3q19storagespecification_pack_W(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_pack_NE {
-static FUNC_PREFIX void d3q19storagespecification_pack_NE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_TS {
+static FUNC_PREFIX void d3q19storagespecification_pack_TS(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_pack_TW {
-static FUNC_PREFIX void d3q19storagespecification_pack_TW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_E {
+static FUNC_PREFIX void d3q19storagespecification_pack_E(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_pack_BE {
-static FUNC_PREFIX void d3q19storagespecification_pack_BE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_SE {
+static FUNC_PREFIX void d3q19storagespecification_pack_SE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_pack_W {
-static FUNC_PREFIX void d3q19storagespecification_pack_W(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_BN {
+static FUNC_PREFIX void d3q19storagespecification_pack_BN(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33;
-         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
-         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
-         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
-         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 1] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 3] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 4] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_pack_S {
-static FUNC_PREFIX void d3q19storagespecification_pack_S(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_TN {
+static FUNC_PREFIX void d3q19storagespecification_pack_TN(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32;
-         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
-         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
-         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
-         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 1] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 3] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 4] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_pack_SE {
-static FUNC_PREFIX void d3q19storagespecification_pack_SE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_NE {
+static FUNC_PREFIX void d3q19storagespecification_pack_NE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_pack_TS {
-static FUNC_PREFIX void d3q19storagespecification_pack_TS(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_TW {
+static FUNC_PREFIX void d3q19storagespecification_pack_TW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_pack_BN {
-static FUNC_PREFIX void d3q19storagespecification_pack_BN(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_TE {
+static FUNC_PREFIX void d3q19storagespecification_pack_TE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_pack_B {
-static FUNC_PREFIX void d3q19storagespecification_pack_B(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_S {
+static FUNC_PREFIX void d3q19storagespecification_pack_S(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36;
-         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
-         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
-         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
-         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 1] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 3] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 4] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+            _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_unpack_BW {
-static FUNC_PREFIX void d3q19storagespecification_unpack_BW(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+   namespace internal_d3q19storagespecification_unpack_N {
+static FUNC_PREFIX void d3q19storagespecification_unpack_N(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_unpack_N {
-static FUNC_PREFIX void d3q19storagespecification_unpack_N(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_TW {
+static FUNC_PREFIX void d3q19storagespecification_unpack_TW(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32;
-         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
-         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
-         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
-         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2];
-            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 1];
-            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 2];
-            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 3];
-            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 4];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_unpack_SE {
-static FUNC_PREFIX void d3q19storagespecification_unpack_SE(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_NW {
+static FUNC_PREFIX void d3q19storagespecification_unpack_NW(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_unpack_TE {
-static FUNC_PREFIX void d3q19storagespecification_unpack_TE(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_S {
+static FUNC_PREFIX void d3q19storagespecification_unpack_S(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_unpack_T {
-static FUNC_PREFIX void d3q19storagespecification_unpack_T(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_BS {
+static FUNC_PREFIX void d3q19storagespecification_unpack_BS(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36;
-         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
-         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
-         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
-         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2];
-            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 1];
-            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 2];
-            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 3];
-            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 4];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_unpack_TS {
-static FUNC_PREFIX void d3q19storagespecification_unpack_TS(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_NE {
+static FUNC_PREFIX void d3q19storagespecification_unpack_NE(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_unpack_BE {
-static FUNC_PREFIX void d3q19storagespecification_unpack_BE(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_BW {
+static FUNC_PREFIX void d3q19storagespecification_unpack_BW(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_unpack_NW {
-static FUNC_PREFIX void d3q19storagespecification_unpack_NW(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_BN {
+static FUNC_PREFIX void d3q19storagespecification_unpack_BN(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_unpack_NE {
-static FUNC_PREFIX void d3q19storagespecification_unpack_NE(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_TN {
+static FUNC_PREFIX void d3q19storagespecification_unpack_TN(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_unpack_BS {
-static FUNC_PREFIX void d3q19storagespecification_unpack_BS(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_W {
+static FUNC_PREFIX void d3q19storagespecification_unpack_W(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_unpack_E {
-static FUNC_PREFIX void d3q19storagespecification_unpack_E(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_SW {
+static FUNC_PREFIX void d3q19storagespecification_unpack_SW(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33;
-         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
-         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
-         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
-         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2];
-            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 1];
-            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 2];
-            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 3];
-            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 4];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_unpack_S {
-static FUNC_PREFIX void d3q19storagespecification_unpack_S(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_SE {
+static FUNC_PREFIX void d3q19storagespecification_unpack_SE(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31;
-         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
-         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
-         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
-         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2];
-            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 1];
-            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 2];
-            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 3];
-            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 4];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_unpack_W {
-static FUNC_PREFIX void d3q19storagespecification_unpack_W(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_E {
+static FUNC_PREFIX void d3q19storagespecification_unpack_E(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34;
-         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
-         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
-         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
-         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2];
-            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 1];
-            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 2];
-            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 3];
-            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 4];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_unpack_SW {
-static FUNC_PREFIX void d3q19storagespecification_unpack_SW(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_TS {
+static FUNC_PREFIX void d3q19storagespecification_unpack_TS(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_unpack_B {
-static FUNC_PREFIX void d3q19storagespecification_unpack_B(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_BE {
+static FUNC_PREFIX void d3q19storagespecification_unpack_BE(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35;
-         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
-         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
-         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
-         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2];
-            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 1];
-            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 2];
-            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 3];
-            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 4];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_unpack_TN {
-static FUNC_PREFIX void d3q19storagespecification_unpack_TN(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_B {
+static FUNC_PREFIX void d3q19storagespecification_unpack_B(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_unpack_BN {
-static FUNC_PREFIX void d3q19storagespecification_unpack_BN(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_TE {
+static FUNC_PREFIX void d3q19storagespecification_unpack_TE(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_unpack_TW {
-static FUNC_PREFIX void d3q19storagespecification_unpack_TW(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_T {
+static FUNC_PREFIX void d3q19storagespecification_unpack_T(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_localCopy_NE {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_NE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+   namespace internal_d3q19storagespecification_localCopy_BE {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_BE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
-         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_localCopy_TS {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_TS(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_NE {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_NE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
-         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_localCopy_BE {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_BE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_TE {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_TE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
-         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_localCopy_BS {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_BS(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_N {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_N(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
-         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_localCopy_BW {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_BW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_NW {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_NW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
-         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_localCopy_T {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_T(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_BS {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_BS(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35;
-         double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35;
-         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
-         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
-         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
-         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
-         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
-         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
-         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
-         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
          }
       }
    }
@@ -1220,337 +863,209 @@ static FUNC_PREFIX void d3q19storagespecification_localCopy_T(double * RESTRICT
 namespace internal_d3q19storagespecification_localCopy_TN {
 static FUNC_PREFIX void d3q19storagespecification_localCopy_TN(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
-         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_localCopy_W {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_W(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_SE {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_SE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33;
-         double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33;
-         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
-         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
-         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
-         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
-         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
-         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
-         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
-         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_localCopy_E {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_E(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_B {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_B(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34;
-         double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34;
-         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
-         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
-         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
-         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
-         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
-         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
-         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
-         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_localCopy_TW {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_TW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_W {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_W(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
-         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_localCopy_SW {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_SW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_TS {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_TS(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
-         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_localCopy_NW {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_NW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_TW {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_TW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
-         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_localCopy_BN {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_BN(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_BW {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_BW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
-         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_localCopy_TE {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_TE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_SW {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_SW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
-         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_localCopy_B {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_B(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_BN {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_BN(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36;
-         double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36;
-         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
-         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
-         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
-         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
-         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
-         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
-         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
-         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_localCopy_N {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_N(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_E {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_E(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31;
-         double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31;
-         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
-         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
-         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
-         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
-         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
-         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
-         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
-         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_localCopy_S {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_S(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_T {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_T(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32;
-         double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32;
-         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
-         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
-         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
-         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
-         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
-         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
-         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
-         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q19storagespecification_localCopy_SE {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_SE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_S {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_S(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
-         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
          }
       }
    }
@@ -1558,15 +1073,12 @@ static FUNC_PREFIX void d3q19storagespecification_localCopy_SE(double * RESTRICT
 }
 
 
+   
 
-
-/*************************************************************************************
+   /*************************************************************************************
  *                                 Kernel Wrappers
 *************************************************************************************/
 
-namespace walberla {
-namespace lbm {
-
    void D3Q19StorageSpecification::PackKernels::packAll(PdfField_T * pdfs_src, CellInterval & ci, unsigned char * outBuffer) const
    {
       double * buffer = reinterpret_cast<double*>(outBuffer);
diff --git a/src/lbm_generated/storage_specification/D3Q19StorageSpecification.h b/src/lbm_generated/storage_specification/D3Q19StorageSpecification.h
index 7c2fb9e85eb2388361ad3737bb2cc64bcc075aea..7777f1d5d1be068744812202e08623c077d3b538 100644
--- a/src/lbm_generated/storage_specification/D3Q19StorageSpecification.h
+++ b/src/lbm_generated/storage_specification/D3Q19StorageSpecification.h
@@ -68,9 +68,35 @@ class D3Q19StorageSpecification
    // If true the background deviation (rho_0 = 1) is subtracted for the collision step.
    static const bool zeroCenteredPDFs = true;
    // Lattice weights
-   static constexpr double w[19] = { 0.333333333333333,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778 };
+   static constexpr double w[19] = { double(0.333333333333333), double(0.0555555555555556), double(0.0555555555555556), double(0.0555555555555556), double(0.0555555555555556), double(0.0555555555555556), double(0.0555555555555556), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778) };
    // Inverse lattice weights
-   static constexpr double wInv[19] = { 3.00000000000000,18.0000000000000,18.0000000000000,18.0000000000000,18.0000000000000,18.0000000000000,18.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000 };
+   static constexpr double wInv[19] = { double(3.00000000000000), double(18.0000000000000), double(18.0000000000000), double(18.0000000000000), double(18.0000000000000), double(18.0000000000000), double(18.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000) };
+
+   struct AccessorEVEN
+   {
+      static constexpr cell_idx_t readX[19] = { 0, 0, 0, 1, -1, 0, 0, 1, -1, 1, -1, 0, 0, 1, -1, 0, 0, 1, -1 };
+      static constexpr cell_idx_t readY[19] = { 0, -1, 1, 0, 0, 0, 0, -1, -1, 1, 1, -1, 1, 0, 0, -1, 1, 0, 0 };
+      static constexpr cell_idx_t readZ[19] = { 0, 0, 0, 0, 0, -1, 1, 0, 0, 0, 0, -1, -1, -1, -1, 1, 1, 1, 1 };
+      static constexpr cell_idx_t readD[19] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 };
+
+      static constexpr cell_idx_t writeX[19] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+      static constexpr cell_idx_t writeY[19] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+      static constexpr cell_idx_t writeZ[19] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+      static constexpr cell_idx_t writeD[19] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 };
+   };
+
+   struct AccessorODD
+   {
+      static constexpr cell_idx_t readX[19] = { 0, 0, 0, 1, -1, 0, 0, 1, -1, 1, -1, 0, 0, 1, -1, 0, 0, 1, -1 };
+      static constexpr cell_idx_t readY[19] = { 0, -1, 1, 0, 0, 0, 0, -1, -1, 1, 1, -1, 1, 0, 0, -1, 1, 0, 0 };
+      static constexpr cell_idx_t readZ[19] = { 0, 0, 0, 0, 0, -1, 1, 0, 0, 0, 0, -1, -1, -1, -1, 1, 1, 1, 1 };
+      static constexpr cell_idx_t readD[19] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 };
+
+      static constexpr cell_idx_t writeX[19] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+      static constexpr cell_idx_t writeY[19] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+      static constexpr cell_idx_t writeZ[19] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+      static constexpr cell_idx_t writeD[19] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 };
+   };
 
    // Compute kernels to pack and unpack MPI buffers
    class PackKernels {
@@ -84,8 +110,8 @@ class D3Q19StorageSpecification
       static const bool inplace = false;
 
       /**
-       * Packs all pdfs from the given cell interval to the send buffer.
-       * */
+      * Packs all pdfs from the given cell interval to the send buffer.
+      * */
       void packAll(PdfField_T * pdfs_src, CellInterval & ci, unsigned char * outBuffer) const;
 
       /**
@@ -124,7 +150,7 @@ class D3Q19StorageSpecification
        * @return    The required size of the buffer, in bytes
        * */
       uint_t size (CellInterval & ci, stencil::Direction dir) const {
-         return ci.numCells() * sizes[dir] * sizeof(value_type);
+         return ci.numCells() * sizes[dir] * uint_c(sizeof(value_type));
       }
 
       /**
@@ -134,7 +160,7 @@ class D3Q19StorageSpecification
        * @return    The required size of the buffer, in bytes
        * */
       uint_t size (CellInterval & ci) const {
-         return ci.numCells() * 19 * sizeof(value_type);
+         return ci.numCells() * 19 * uint_c(sizeof(value_type));
       }
 
       
@@ -143,6 +169,8 @@ class D3Q19StorageSpecification
       const uint_t sizes[27] { 0, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
    };
 
+   using value_type = PackKernels::value_type;
+
 };
 
 }} //lbm/walberla
\ No newline at end of file
diff --git a/src/lbm_generated/storage_specification/D3Q27StorageSpecification.cpp b/src/lbm_generated/storage_specification/D3Q27StorageSpecification.cpp
index 3ecdf88928bf8292254465e0f4ec19d4a1106373..dce3446abed43af223a3c4dbfeb6cefcf4966fd4 100644
--- a/src/lbm_generated/storage_specification/D3Q27StorageSpecification.cpp
+++ b/src/lbm_generated/storage_specification/D3Q27StorageSpecification.cpp
@@ -27,345 +27,132 @@
 #   pragma GCC diagnostic ignored "-Wunused-variable"
 #endif
 
-/*************************************************************************************
+namespace walberla {
+namespace lbm {
+
+   /*************************************************************************************
  *                                Kernel Definitions
 *************************************************************************************/
-namespace internal_d3q27storagespecification_pack_ALL {
+   namespace internal_d3q27storagespecification_pack_ALL {
 static FUNC_PREFIX void d3q27storagespecification_pack_ALL(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
-   {
-      double * RESTRICT _data_pdfs_src_00_30 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0;
-      double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+   {
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_30_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_30;
-         double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31;
-         double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32;
-         double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33;
-         double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34;
-         double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35;
-         double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36;
-         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
-         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
-         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
-         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
-         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
-         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
-         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
-         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
-         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
-         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
-         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
-         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
-         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
-         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
-         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
-         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
-         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
-         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
-         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
-         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
-         {
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2] = _data_pdfs_src_00_30_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 1] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 3] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 4] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 5] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 6] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 7] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 8] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 9] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 10] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 11] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 12] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 13] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 14] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 15] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 16] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 17] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 18] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 19] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 20] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 21] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 22] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 23] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 24] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 25] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 26] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
-         }
-      }
-   }
-}
-}
-
-namespace internal_d3q27storagespecification_unpack_ALL {
-static FUNC_PREFIX void d3q27storagespecification_unpack_ALL(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
-{
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
-   {
-      double * RESTRICT  _data_pdfs_dst_00_30 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0;
-      double * RESTRICT  _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+         {
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 5] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 6] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 7] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 8] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 9] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 10] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 11] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 12] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 13] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 14] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 15] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 16] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 17] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 18] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 19] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 20] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 21] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 22] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 23] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 24] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 25] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
+            _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 26] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
+         }
+      }
+   }
+}
+}
+
+   namespace internal_d3q27storagespecification_unpack_ALL {
+static FUNC_PREFIX void d3q27storagespecification_unpack_ALL(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+   {
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_30_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_30;
-         double * RESTRICT  _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31;
-         double * RESTRICT  _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32;
-         double * RESTRICT  _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33;
-         double * RESTRICT  _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34;
-         double * RESTRICT  _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35;
-         double * RESTRICT  _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36;
-         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
-         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
-         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
-         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
-         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
-         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
-         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
-         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
-         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
-         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
-         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
-         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
-         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
-         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
-         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
-         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
-         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
-         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
-         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
-         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
-         {
-            _data_pdfs_dst_00_30_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2];
-            _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 1];
-            _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 2];
-            _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 3];
-            _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 4];
-            _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 5];
-            _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 6];
-            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 7];
-            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 8];
-            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 9];
-            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 10];
-            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 11];
-            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 12];
-            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 13];
-            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 14];
-            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 15];
-            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 16];
-            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 17];
-            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 18];
-            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 19];
-            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 20];
-            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 21];
-            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 22];
-            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 23];
-            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 24];
-            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 25];
-            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 26];
-         }
-      }
-   }
-}
-}
-
-namespace internal_d3q27storagespecification_localCopy_ALL {
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+         {
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 4];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 5];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 6];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 7];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 8];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 9];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 10];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 11];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 12];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 13];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 14];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 15];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 16];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 17];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 18];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 19];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 20];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 21];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 22];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 23];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 24];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 25];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 26];
+         }
+      }
+   }
+}
+}
+
+   namespace internal_d3q27storagespecification_localCopy_ALL {
 static FUNC_PREFIX void d3q27storagespecification_localCopy_ALL(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
-   {
-      double * RESTRICT  _data_pdfs_dst_00_30 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0;
-      double * RESTRICT _data_pdfs_src_00_30 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0;
-      double * RESTRICT  _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+   {
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_30_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_30;
-         double * RESTRICT _data_pdfs_src_00_30_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_30;
-         double * RESTRICT  _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31;
-         double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31;
-         double * RESTRICT  _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32;
-         double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32;
-         double * RESTRICT  _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33;
-         double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33;
-         double * RESTRICT  _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34;
-         double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34;
-         double * RESTRICT  _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35;
-         double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35;
-         double * RESTRICT  _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36;
-         double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36;
-         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
-         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
-         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
-         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
-         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
-         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
-         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
-         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
-         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
-         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
-         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
-         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
-         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
-         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
-         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
-         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
-         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
-         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
-         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
-         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
-         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
-         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
-         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
-         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
-         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
-         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
-         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
-         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
-         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
-         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
-         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
-         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
-         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
-         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
-         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
-         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
-         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
-         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
-         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
-         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
-         {
-            _data_pdfs_dst_00_30_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_30_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+         {
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
          }
       }
    }
@@ -373,276 +160,200 @@ static FUNC_PREFIX void d3q27storagespecification_localCopy_ALL(double * RESTRIC
 }
 
 
-namespace internal_d3q27storagespecification_pack_T {
-static FUNC_PREFIX void d3q27storagespecification_pack_T(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+   namespace internal_d3q27storagespecification_pack_BE {
+static FUNC_PREFIX void d3q27storagespecification_pack_BE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
-   {
-      double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+   {
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35;
-         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
-         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
-         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
-         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
-         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
-         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
-         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
-         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 1] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 3] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 4] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 5] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 6] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 7] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 8] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_pack_BN {
-static FUNC_PREFIX void d3q27storagespecification_pack_BN(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_SE {
+static FUNC_PREFIX void d3q27storagespecification_pack_SE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
-         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
-         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_pack_NE {
-static FUNC_PREFIX void d3q27storagespecification_pack_NE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_BW {
+static FUNC_PREFIX void d3q27storagespecification_pack_BW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
-         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
-         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_pack_BNE {
-static FUNC_PREFIX void d3q27storagespecification_pack_BNE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_BSW {
+static FUNC_PREFIX void d3q27storagespecification_pack_BSW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_pack_SE {
-static FUNC_PREFIX void d3q27storagespecification_pack_SE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_N {
+static FUNC_PREFIX void d3q27storagespecification_pack_N(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
-         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
-         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 5] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 6] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 7] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 8] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_pack_TNW {
-static FUNC_PREFIX void d3q27storagespecification_pack_TNW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_SW {
+static FUNC_PREFIX void d3q27storagespecification_pack_SW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_pack_W {
-static FUNC_PREFIX void d3q27storagespecification_pack_W(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_T {
+static FUNC_PREFIX void d3q27storagespecification_pack_T(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
-   {
-      double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+   {
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33;
-         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
-         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
-         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
-         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
-         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
-         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
-         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
-         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 1] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 3] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 4] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 5] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 6] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 7] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 8] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 5] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 6] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 7] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 8] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_pack_TE {
-static FUNC_PREFIX void d3q27storagespecification_pack_TE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_B {
+static FUNC_PREFIX void d3q27storagespecification_pack_B(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
-         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
-         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 5] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 6] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 7] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 8] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_pack_N {
-static FUNC_PREFIX void d3q27storagespecification_pack_N(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_TNW {
+static FUNC_PREFIX void d3q27storagespecification_pack_TNW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
-   {
-      double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+   {
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31;
-         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
-         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
-         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
-         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
-         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
-         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
-         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
-         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 1] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 3] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 4] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 5] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 6] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 7] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 8] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_pack_BSW {
-static FUNC_PREFIX void d3q27storagespecification_pack_BSW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_W {
+static FUNC_PREFIX void d3q27storagespecification_pack_W(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 5] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 6] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 7] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 8] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
          }
       }
    }
@@ -652,165 +363,123 @@ static FUNC_PREFIX void d3q27storagespecification_pack_BSW(double * RESTRICT  _d
 namespace internal_d3q27storagespecification_pack_TSW {
 static FUNC_PREFIX void d3q27storagespecification_pack_TSW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_pack_BE {
-static FUNC_PREFIX void d3q27storagespecification_pack_BE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_E {
+static FUNC_PREFIX void d3q27storagespecification_pack_E(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
-         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
-         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 5] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 6] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 7] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 8] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_pack_B {
-static FUNC_PREFIX void d3q27storagespecification_pack_B(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_BSE {
+static FUNC_PREFIX void d3q27storagespecification_pack_BSE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
-   {
-      double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+   {
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36;
-         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
-         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
-         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
-         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
-         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
-         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
-         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
-         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 1] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 3] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 4] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 5] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 6] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 7] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 8] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_pack_TNE {
-static FUNC_PREFIX void d3q27storagespecification_pack_TNE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_TN {
+static FUNC_PREFIX void d3q27storagespecification_pack_TN(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_pack_TS {
-static FUNC_PREFIX void d3q27storagespecification_pack_TS(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_BNW {
+static FUNC_PREFIX void d3q27storagespecification_pack_BNW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
-         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
-         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_pack_TN {
-static FUNC_PREFIX void d3q27storagespecification_pack_TN(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_TS {
+static FUNC_PREFIX void d3q27storagespecification_pack_TS(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
-         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
-         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_pack_BNW {
-static FUNC_PREFIX void d3q27storagespecification_pack_BNW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_NW {
+static FUNC_PREFIX void d3q27storagespecification_pack_NW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
          }
       }
    }
@@ -820,105 +489,71 @@ static FUNC_PREFIX void d3q27storagespecification_pack_BNW(double * RESTRICT  _d
 namespace internal_d3q27storagespecification_pack_TW {
 static FUNC_PREFIX void d3q27storagespecification_pack_TW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
-         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
-         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_pack_BSE {
-static FUNC_PREFIX void d3q27storagespecification_pack_BSE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_S {
+static FUNC_PREFIX void d3q27storagespecification_pack_S(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 5] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 6] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 7] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
+            _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 8] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_pack_NW {
-static FUNC_PREFIX void d3q27storagespecification_pack_NW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_TNE {
+static FUNC_PREFIX void d3q27storagespecification_pack_TNE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
-         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
-         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_pack_S {
-static FUNC_PREFIX void d3q27storagespecification_pack_S(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_TSE {
+static FUNC_PREFIX void d3q27storagespecification_pack_TSE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
-   {
-      double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+   {
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32;
-         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
-         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
-         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
-         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
-         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
-         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
-         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
-         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 1] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 3] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 4] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 5] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 6] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 7] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 8] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
          }
       }
    }
@@ -928,1693 +563,1061 @@ static FUNC_PREFIX void d3q27storagespecification_pack_S(double * RESTRICT  _dat
 namespace internal_d3q27storagespecification_pack_BS {
 static FUNC_PREFIX void d3q27storagespecification_pack_BS(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
-         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
-         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_pack_TSE {
-static FUNC_PREFIX void d3q27storagespecification_pack_TSE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_TE {
+static FUNC_PREFIX void d3q27storagespecification_pack_TE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_pack_SW {
-static FUNC_PREFIX void d3q27storagespecification_pack_SW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_BN {
+static FUNC_PREFIX void d3q27storagespecification_pack_BN(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
-         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
-         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_pack_BW {
-static FUNC_PREFIX void d3q27storagespecification_pack_BW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_NE {
+static FUNC_PREFIX void d3q27storagespecification_pack_NE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
-         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
-         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+            _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_pack_E {
-static FUNC_PREFIX void d3q27storagespecification_pack_E(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_BNE {
+static FUNC_PREFIX void d3q27storagespecification_pack_BNE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
-   {
-      double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
-      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+   {
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34;
-         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
-         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
-         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
-         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
-         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
-         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
-         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
-         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
          {
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 1] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 3] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 4] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 5] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 6] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 7] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
-            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 8] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_TSE {
-static FUNC_PREFIX void d3q27storagespecification_unpack_TSE(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+   namespace internal_d3q27storagespecification_unpack_SE {
+static FUNC_PREFIX void d3q27storagespecification_unpack_SE(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_T {
-static FUNC_PREFIX void d3q27storagespecification_unpack_T(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
-{
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
-   {
-      double * RESTRICT  _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
+namespace internal_d3q27storagespecification_unpack_BS {
+static FUNC_PREFIX void d3q27storagespecification_unpack_BS(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+   {
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36;
-         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
-         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
-         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
-         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
-         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
-         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
-         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
-         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2];
-            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 1];
-            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 2];
-            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 3];
-            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 4];
-            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 5];
-            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 6];
-            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 7];
-            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 8];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_TN {
-static FUNC_PREFIX void d3q27storagespecification_unpack_TN(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_BNW {
+static FUNC_PREFIX void d3q27storagespecification_unpack_BNW(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
-         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
-         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
-            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
-            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_SW {
-static FUNC_PREFIX void d3q27storagespecification_unpack_SW(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_BSW {
+static FUNC_PREFIX void d3q27storagespecification_unpack_BSW(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
-         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
-         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
-            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
-            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_TNE {
-static FUNC_PREFIX void d3q27storagespecification_unpack_TNE(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_W {
+static FUNC_PREFIX void d3q27storagespecification_unpack_W(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 4];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 5];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 6];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 7];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 8];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_BN {
-static FUNC_PREFIX void d3q27storagespecification_unpack_BN(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_TN {
+static FUNC_PREFIX void d3q27storagespecification_unpack_TN(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
-         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
-         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
-            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
-            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_W {
-static FUNC_PREFIX void d3q27storagespecification_unpack_W(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
-{
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
-   {
-      double * RESTRICT  _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
+namespace internal_d3q27storagespecification_unpack_E {
+static FUNC_PREFIX void d3q27storagespecification_unpack_E(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+   {
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34;
-         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
-         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
-         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
-         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
-         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
-         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
-         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
-         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2];
-            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 1];
-            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 2];
-            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 3];
-            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 4];
-            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 5];
-            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 6];
-            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 7];
-            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 8];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 4];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 5];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 6];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 7];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 8];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_E {
-static FUNC_PREFIX void d3q27storagespecification_unpack_E(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
-{
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
-   {
-      double * RESTRICT  _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
+namespace internal_d3q27storagespecification_unpack_TS {
+static FUNC_PREFIX void d3q27storagespecification_unpack_TS(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+   {
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33;
-         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
-         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
-         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
-         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
-         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
-         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
-         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
-         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2];
-            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 1];
-            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 2];
-            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 3];
-            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 4];
-            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 5];
-            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 6];
-            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 7];
-            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 8];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_BNE {
-static FUNC_PREFIX void d3q27storagespecification_unpack_BNE(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_TW {
+static FUNC_PREFIX void d3q27storagespecification_unpack_TW(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_TNW {
-static FUNC_PREFIX void d3q27storagespecification_unpack_TNW(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_NE {
+static FUNC_PREFIX void d3q27storagespecification_unpack_NE(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_BSE {
-static FUNC_PREFIX void d3q27storagespecification_unpack_BSE(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_NW {
+static FUNC_PREFIX void d3q27storagespecification_unpack_NW(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_BSW {
-static FUNC_PREFIX void d3q27storagespecification_unpack_BSW(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_TE {
+static FUNC_PREFIX void d3q27storagespecification_unpack_TE(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_SE {
-static FUNC_PREFIX void d3q27storagespecification_unpack_SE(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_S {
+static FUNC_PREFIX void d3q27storagespecification_unpack_S(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
-         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
-         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
-            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
-            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 4];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 5];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 6];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 7];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 8];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_N {
-static FUNC_PREFIX void d3q27storagespecification_unpack_N(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
-{
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
-   {
-      double * RESTRICT  _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
+namespace internal_d3q27storagespecification_unpack_B {
+static FUNC_PREFIX void d3q27storagespecification_unpack_B(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+   {
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32;
-         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
-         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
-         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
-         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
-         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
-         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
-         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
-         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2];
-            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 1];
-            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 2];
-            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 3];
-            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 4];
-            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 5];
-            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 6];
-            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 7];
-            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 8];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 4];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 5];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 6];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 7];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 8];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_NE {
-static FUNC_PREFIX void d3q27storagespecification_unpack_NE(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_BN {
+static FUNC_PREFIX void d3q27storagespecification_unpack_BN(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
-         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
-         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
-            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
-            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_TE {
-static FUNC_PREFIX void d3q27storagespecification_unpack_TE(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_TSE {
+static FUNC_PREFIX void d3q27storagespecification_unpack_TSE(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
-         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
-         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
-            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
-            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_B {
-static FUNC_PREFIX void d3q27storagespecification_unpack_B(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
-{
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
-   {
-      double * RESTRICT  _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
+namespace internal_d3q27storagespecification_unpack_TNE {
+static FUNC_PREFIX void d3q27storagespecification_unpack_TNE(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+   {
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35;
-         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
-         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
-         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
-         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
-         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
-         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
-         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
-         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2];
-            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 1];
-            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 2];
-            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 3];
-            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 4];
-            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 5];
-            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 6];
-            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 7];
-            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 8];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_NW {
-static FUNC_PREFIX void d3q27storagespecification_unpack_NW(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_BW {
+static FUNC_PREFIX void d3q27storagespecification_unpack_BW(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
-         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
-         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
-            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
-            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_S {
-static FUNC_PREFIX void d3q27storagespecification_unpack_S(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
-{
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
-   {
-      double * RESTRICT  _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
+namespace internal_d3q27storagespecification_unpack_TNW {
+static FUNC_PREFIX void d3q27storagespecification_unpack_TNW(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+   {
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31;
-         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
-         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
-         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
-         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
-         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
-         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
-         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
-         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2];
-            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 1];
-            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 2];
-            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 3];
-            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 4];
-            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 5];
-            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 6];
-            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 7];
-            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 8];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_TSW {
-static FUNC_PREFIX void d3q27storagespecification_unpack_TSW(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_BNE {
+static FUNC_PREFIX void d3q27storagespecification_unpack_BNE(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_BE {
-static FUNC_PREFIX void d3q27storagespecification_unpack_BE(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_BSE {
+static FUNC_PREFIX void d3q27storagespecification_unpack_BSE(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
-         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
-         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
-            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
-            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_BS {
-static FUNC_PREFIX void d3q27storagespecification_unpack_BS(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_N {
+static FUNC_PREFIX void d3q27storagespecification_unpack_N(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
-         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
-         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
-            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
-            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 4];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 5];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 6];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 7];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 8];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_BW {
-static FUNC_PREFIX void d3q27storagespecification_unpack_BW(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_BE {
+static FUNC_PREFIX void d3q27storagespecification_unpack_BE(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
-         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
-         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
-            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
-            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_TS {
-static FUNC_PREFIX void d3q27storagespecification_unpack_TS(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_SW {
+static FUNC_PREFIX void d3q27storagespecification_unpack_SW(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
-         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
-         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
-            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
-            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_BNW {
-static FUNC_PREFIX void d3q27storagespecification_unpack_BNW(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_T {
+static FUNC_PREFIX void d3q27storagespecification_unpack_T(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 1];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 4];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 5];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 6];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 7];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 8];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_unpack_TW {
-static FUNC_PREFIX void d3q27storagespecification_unpack_TW(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_TSW {
+static FUNC_PREFIX void d3q27storagespecification_unpack_TSW(double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
-      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
-         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
-         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
-            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
-            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_SE {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_SE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+   namespace internal_d3q27storagespecification_localCopy_TNE {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_TNE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
-         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
-         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
-         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
-         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
-         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_TS {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_TS(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_S {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_S(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
-         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
-         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
-         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
-         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
-         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_BNW {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_BNW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_E {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_E(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
-         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_TSW {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_TSW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_TE {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_TE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
-         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_TE {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_TE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_SW {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_SW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
-         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
-         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
-         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
-         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
-         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_TNE {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_TNE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_TS {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_TS(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
-         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_BS {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_BS(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_BSE {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_BSE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
-         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
-         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
-         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
-         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
-         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_W {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_W(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_BNE {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_BNE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
-   {
-      double * RESTRICT  _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+   {
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33;
-         double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33;
-         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
-         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
-         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
-         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
-         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
-         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
-         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
-         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
-         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
-         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
-         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
-         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
-         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
-         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
-         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
-         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
-         {
-            _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+         {
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_TSE {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_TSE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_TSW {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_TSW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
-         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_NE {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_NE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_SE {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_SE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
-         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
-         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
-         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
-         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
-         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_B {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_B(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_W {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_W(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
-   {
-      double * RESTRICT  _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+   {
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36;
-         double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36;
-         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
-         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
-         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
-         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
-         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
-         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
-         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
-         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
-         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
-         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
-         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
-         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
-         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
-         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
-         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
-         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
-         {
-            _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+         {
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_TNW {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_TNW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_TSE {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_TSE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
-         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_NW {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_NW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_BW {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_BW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
-         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
-         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
-         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
-         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
-         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_BN {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_BN(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_TW {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_TW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
-         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
-         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
-         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
-         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
-         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_TW {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_TW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_B {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_B(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
-         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
-         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
-         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
-         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
-         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_BW {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_BW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_N {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_N(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
-         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
-         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
-         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
-         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
-         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_SW {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_SW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_NW {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_NW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
-         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
-         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
-         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
-         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
-         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_T {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_T(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_TNW {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_TNW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
-   {
-      double * RESTRICT  _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+   {
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35;
-         double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35;
-         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
-         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
-         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
-         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
-         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
-         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
-         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
-         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
-         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
-         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
-         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
-         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
-         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
-         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
-         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
-         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
-         {
-            _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+         {
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_BSW {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_BSW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_NE {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_NE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
-         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_S {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_S(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_BN {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_BN(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
-   {
-      double * RESTRICT  _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+   {
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32;
-         double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32;
-         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
-         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
-         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
-         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
-         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
-         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
-         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
-         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
-         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
-         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
-         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
-         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
-         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
-         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
-         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
-         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
-         {
-            _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+         {
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_TN {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_TN(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_T {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_T(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
-         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
-         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
-         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
-         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
-         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_E {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_E(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_BNW {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_BNW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
-   {
-      double * RESTRICT  _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+   {
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34;
-         double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34;
-         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
-         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
-         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
-         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
-         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
-         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
-         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
-         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
-         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
-         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
-         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
-         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
-         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
-         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
-         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
-         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
-         {
-            _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+         {
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_N {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_N(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_BS {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_BS(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
-   {
-      double * RESTRICT  _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+   {
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31;
-         double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31;
-         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
-         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
-         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
-         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
-         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
-         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
-         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
-         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
-         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
-         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
-         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
-         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
-         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
-         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
-         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
-         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
-         {
-            _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+         {
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_BSE {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_BSE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_BSW {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_BSW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
-         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_BE {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_BE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_TN {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_TN(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
-      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
-         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
-         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
-         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
-         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
-         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
-            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
          }
       }
    }
 }
 }
 
-namespace internal_d3q27storagespecification_localCopy_BNE {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_BNE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_BE {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_BE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
 {
-   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
-      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
-         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
-         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
          {
-            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
+            _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
          }
       }
    }
@@ -2622,15 +1625,12 @@ static FUNC_PREFIX void d3q27storagespecification_localCopy_BNE(double * RESTRIC
 }
 
 
+   
 
-
-/*************************************************************************************
+   /*************************************************************************************
  *                                 Kernel Wrappers
 *************************************************************************************/
 
-namespace walberla {
-namespace lbm {
-
    void D3Q27StorageSpecification::PackKernels::packAll(PdfField_T * pdfs_src, CellInterval & ci, unsigned char * outBuffer) const
    {
       double * buffer = reinterpret_cast<double*>(outBuffer);
diff --git a/src/lbm_generated/storage_specification/D3Q27StorageSpecification.h b/src/lbm_generated/storage_specification/D3Q27StorageSpecification.h
index 42599878544c3e4632603b7141074e9196b2153a..c765ef8a54e7ea8172efd8b429101c67e711d215 100644
--- a/src/lbm_generated/storage_specification/D3Q27StorageSpecification.h
+++ b/src/lbm_generated/storage_specification/D3Q27StorageSpecification.h
@@ -68,9 +68,35 @@ class D3Q27StorageSpecification
    // If true the background deviation (rho_0 = 1) is subtracted for the collision step.
    static const bool zeroCenteredPDFs = true;
    // Lattice weights
-   static constexpr double w[27] = { 0.296296296296296,0.0740740740740741,0.0740740740740741,0.0740740740740741,0.0740740740740741,0.0740740740740741,0.0740740740740741,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.00462962962962963,0.00462962962962963,0.00462962962962963,0.00462962962962963,0.00462962962962963,0.00462962962962963,0.00462962962962963,0.00462962962962963 };
+   static constexpr double w[27] = { double(0.296296296296296), double(0.0740740740740741), double(0.0740740740740741), double(0.0740740740740741), double(0.0740740740740741), double(0.0740740740740741), double(0.0740740740740741), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.00462962962962963), double(0.00462962962962963), double(0.00462962962962963), double(0.00462962962962963), double(0.00462962962962963), double(0.00462962962962963), double(0.00462962962962963), double(0.00462962962962963) };
    // Inverse lattice weights
-   static constexpr double wInv[27] = { 3.37500000000000,13.5000000000000,13.5000000000000,13.5000000000000,13.5000000000000,13.5000000000000,13.5000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,216.000000000000,216.000000000000,216.000000000000,216.000000000000,216.000000000000,216.000000000000,216.000000000000,216.000000000000 };
+   static constexpr double wInv[27] = { double(3.37500000000000), double(13.5000000000000), double(13.5000000000000), double(13.5000000000000), double(13.5000000000000), double(13.5000000000000), double(13.5000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(216.000000000000), double(216.000000000000), double(216.000000000000), double(216.000000000000), double(216.000000000000), double(216.000000000000), double(216.000000000000), double(216.000000000000) };
+
+   struct AccessorEVEN
+   {
+      static constexpr cell_idx_t readX[27] = { 0, 0, 0, 1, -1, 0, 0, 1, -1, 1, -1, 0, 0, 1, -1, 0, 0, 1, -1, -1, 1, -1, 1, -1, 1, -1, 1 };
+      static constexpr cell_idx_t readY[27] = { 0, -1, 1, 0, 0, 0, 0, -1, -1, 1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, -1, 1, 1, -1, -1, 1, 1 };
+      static constexpr cell_idx_t readZ[27] = { 0, 0, 0, 0, 0, -1, 1, 0, 0, 0, 0, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1 };
+      static constexpr cell_idx_t readD[27] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26 };
+
+      static constexpr cell_idx_t writeX[27] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+      static constexpr cell_idx_t writeY[27] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+      static constexpr cell_idx_t writeZ[27] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+      static constexpr cell_idx_t writeD[27] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26 };
+   };
+
+   struct AccessorODD
+   {
+      static constexpr cell_idx_t readX[27] = { 0, 0, 0, 1, -1, 0, 0, 1, -1, 1, -1, 0, 0, 1, -1, 0, 0, 1, -1, -1, 1, -1, 1, -1, 1, -1, 1 };
+      static constexpr cell_idx_t readY[27] = { 0, -1, 1, 0, 0, 0, 0, -1, -1, 1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, -1, 1, 1, -1, -1, 1, 1 };
+      static constexpr cell_idx_t readZ[27] = { 0, 0, 0, 0, 0, -1, 1, 0, 0, 0, 0, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1 };
+      static constexpr cell_idx_t readD[27] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26 };
+
+      static constexpr cell_idx_t writeX[27] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+      static constexpr cell_idx_t writeY[27] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+      static constexpr cell_idx_t writeZ[27] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+      static constexpr cell_idx_t writeD[27] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26 };
+   };
 
    // Compute kernels to pack and unpack MPI buffers
    class PackKernels {
@@ -84,8 +110,8 @@ class D3Q27StorageSpecification
       static const bool inplace = false;
 
       /**
-       * Packs all pdfs from the given cell interval to the send buffer.
-       * */
+      * Packs all pdfs from the given cell interval to the send buffer.
+      * */
       void packAll(PdfField_T * pdfs_src, CellInterval & ci, unsigned char * outBuffer) const;
 
       /**
@@ -124,7 +150,7 @@ class D3Q27StorageSpecification
        * @return    The required size of the buffer, in bytes
        * */
       uint_t size (CellInterval & ci, stencil::Direction dir) const {
-         return ci.numCells() * sizes[dir] * sizeof(value_type);
+         return ci.numCells() * sizes[dir] * uint_c(sizeof(value_type));
       }
 
       /**
@@ -134,7 +160,7 @@ class D3Q27StorageSpecification
        * @return    The required size of the buffer, in bytes
        * */
       uint_t size (CellInterval & ci) const {
-         return ci.numCells() * 27 * sizeof(value_type);
+         return ci.numCells() * 27 * uint_c(sizeof(value_type));
       }
 
       
@@ -143,6 +169,8 @@ class D3Q27StorageSpecification
       const uint_t sizes[27] { 0, 9, 9, 9, 9, 9, 9, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1 };
    };
 
+   using value_type = PackKernels::value_type;
+
 };
 
 }} //lbm/walberla
\ No newline at end of file
diff --git a/src/lbm_generated/storage_specification/storage_specification_generation_script.py b/src/lbm_generated/storage_specification/storage_specification_generation_script.py
index d7432ee70d6233edbd4c408199f1d89ae4fe1e6d..42dcac5f4b72e0a7e99dd642c8c77a61a65a5705 100644
--- a/src/lbm_generated/storage_specification/storage_specification_generation_script.py
+++ b/src/lbm_generated/storage_specification/storage_specification_generation_script.py
@@ -3,7 +3,7 @@ import sympy as sp
 from pystencils import Target
 
 from lbmpy.creationfunctions import create_lb_method
-from lbmpy import LBMConfig, Stencil, Method, LBStencil
+from lbmpy import LBMConfig, LBMOptimisation, Stencil, Method, LBStencil
 from pystencils_walberla import ManualCodeGenerationContext, generate_info_header
 from lbmpy_walberla.storage_specification import generate_lbm_storage_specification
 
@@ -22,11 +22,12 @@ with ManualCodeGenerationContext(openmp=False, optimize_for_localhost=False,
 
         lbm_config = LBMConfig(stencil=stencil, method=method, relaxation_rate=relaxation_rate,
                                streaming_pattern=streaming_pattern)
+        lbm_opt = LBMOptimisation()
 
         lb_method = create_lb_method(lbm_config=lbm_config)
 
         storage_spec_name = f'{stencil.name}StorageSpecification'
-        generate_lbm_storage_specification(ctx, storage_spec_name, lb_method, lbm_config,
+        generate_lbm_storage_specification(ctx, storage_spec_name, lb_method, lbm_config, lbm_opt,
                                            nonuniform=nonuniform, target=target, data_type=data_type)
 
         ctx.write_all_files()
diff --git a/src/lbm_generated/sweep_collection/D3Q19SRT.cpp b/src/lbm_generated/sweep_collection/D3Q19SRT.cpp
index b2ed08360d0699e51e3e47e1906727ef739a2e17..02cf3aaf466412ace818cb43b780d0f626ef4282 100644
--- a/src/lbm_generated/sweep_collection/D3Q19SRT.cpp
+++ b/src/lbm_generated/sweep_collection/D3Q19SRT.cpp
@@ -41,119 +41,43 @@ static FUNC_PREFIX void d3q19srt_kernel_streamCollide(double * RESTRICT const _d
 {
    for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
-      double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2;
-      double * RESTRICT  _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314;
-         double * RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
-         double * RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
-         double * RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318;
-         double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
-         double * RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
-         double * RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
-         double * RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
-         double * RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
-         double * RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313;
-         double * RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
-         double * RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35;
-         double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
-         double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
-         double * RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
-         double * RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
-         double * RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317;
-         double * RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
-         double * RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36;
-         double * RESTRICT  _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30;
-         double * RESTRICT  _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31;
-         double * RESTRICT  _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32;
-         double * RESTRICT  _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33;
-         double * RESTRICT  _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34;
-         double * RESTRICT  _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35;
-         double * RESTRICT  _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36;
-         double * RESTRICT  _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37;
-         double * RESTRICT  _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38;
-         double * RESTRICT  _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39;
-         double * RESTRICT  _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310;
-         double * RESTRICT  _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311;
-         double * RESTRICT  _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312;
-         double * RESTRICT  _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313;
-         double * RESTRICT  _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314;
-         double * RESTRICT  _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315;
-         double * RESTRICT  _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316;
-         double * RESTRICT  _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317;
-         double * RESTRICT  _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318;
          for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1)
          {
-            const double vel0Term = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double vel1Term = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
-            const double vel2Term = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
-            const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] + _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
-            const double u_0 = vel0Term - 1.0*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double u_1 = vel1Term - 1.0*_data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double u_2 = vel2Term - 1.0*_data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_21_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double u0Mu1 = u_0 + u_1*-1.0;
+            const double vel0Term = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3];
+            const double vel1Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3];
+            const double vel2Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3];
+            const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2];
+            const double u_0 = vel0Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3];
+            const double u_1 = vel1Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3];
+            const double u_2 = vel2Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3];
+            const double u0Mu1 = u_0 - u_1;
             const double u0Pu1 = u_0 + u_1;
             const double u1Pu2 = u_1 + u_2;
-            const double u1Mu2 = u_1 + u_2*-1.0;
-            const double u0Mu2 = u_0 + u_2*-1.0;
+            const double u1Mu2 = u_1 - u_2;
+            const double u0Mu2 = u_0 - u_2;
             const double u0Pu2 = u_0 + u_2;
-            const double f_eq_common = delta_rho - 1.0*(u_0*u_0) - 1.0*(u_1*u_1) - 1.0*(u_2*u_2);
-            _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.33333333333333331 - 1.0*_data_pdfs_20_30_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*0.16666666666666666 - 1.0*_data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1)) + _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*-0.16666666666666666 - 1.0*_data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1)) + _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*-0.16666666666666666 - 1.0*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0)) + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*0.16666666666666666 - 1.0*_data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0)) + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*0.16666666666666666 - 1.0*_data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_2*u_2)) + _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*-0.16666666666666666 - 1.0*_data_pdfs_21_36_10[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_2*u_2)) + _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*-0.083333333333333329 - 1.0*_data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)) + _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*0.083333333333333329 - 1.0*_data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)) + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*-0.083333333333333329 - 1.0*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)) + _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*0.083333333333333329 - 1.0*_data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)) + _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*0.083333333333333329 - 1.0*_data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0] + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)) + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*-0.083333333333333329 - 1.0*_data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)) + _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*-0.083333333333333329 - 1.0*_data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)) + _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*0.083333333333333329 - 1.0*_data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)) + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*0.083333333333333329 - 1.0*_data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)) + _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*-0.083333333333333329 - 1.0*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)) + _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*-0.083333333333333329 - 1.0*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)) + _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*0.083333333333333329 - 1.0*_data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)) + _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double f_eq_common = delta_rho - u_0*u_0 - u_1*u_1 - u_2*u_2;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2] = omega*(f_eq_common*0.33333333333333331 - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*0.16666666666666666 + 0.33333333333333331*(u_1*u_1) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*-0.16666666666666666 + 0.33333333333333331*(u_1*u_1) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*-0.16666666666666666 + 0.33333333333333331*(u_0*u_0) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*0.16666666666666666 + 0.33333333333333331*(u_0*u_0) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*0.16666666666666666 + 0.33333333333333331*(u_2*u_2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*-0.16666666666666666 + 0.33333333333333331*(u_2*u_2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*-0.083333333333333329 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*0.083333333333333329 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*-0.083333333333333329 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*0.083333333333333329 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*0.083333333333333329 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*-0.083333333333333329 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*-0.083333333333333329 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*0.083333333333333329 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*0.083333333333333329 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*-0.083333333333333329 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*-0.083333333333333329 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*0.083333333333333329 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3];
          }
       }
    }
@@ -166,100 +90,62 @@ static FUNC_PREFIX void d3q19srt_kernel_collide(double * RESTRICT  _data_pdfs, i
 {
    for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
-      double * RESTRICT  _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315;
-         double * RESTRICT  _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318;
-         double * RESTRICT  _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
-         double * RESTRICT  _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31;
-         double * RESTRICT  _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37;
-         double * RESTRICT  _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311;
-         double * RESTRICT  _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32;
-         double * RESTRICT  _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
-         double * RESTRICT  _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38;
-         double * RESTRICT  _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36;
-         double * RESTRICT  _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313;
-         double * RESTRICT  _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39;
-         double * RESTRICT  _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35;
-         double * RESTRICT  _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
-         double * RESTRICT  _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310;
-         double * RESTRICT  _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317;
-         double * RESTRICT  _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314;
-         double * RESTRICT  _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316;
-         double * RESTRICT  _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312;
          for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1)
          {
-            const double xi_1 = _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0];
-            const double xi_2 = _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0];
-            const double xi_3 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
-            const double xi_4 = _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0];
-            const double xi_5 = _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0];
-            const double xi_6 = _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0];
-            const double xi_7 = _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0];
-            const double xi_8 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0];
-            const double xi_9 = _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0];
-            const double xi_10 = _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0];
-            const double xi_11 = _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0];
-            const double xi_12 = _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0];
-            const double xi_13 = _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0];
-            const double xi_14 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0];
-            const double xi_15 = _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0];
-            const double xi_16 = _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0];
-            const double xi_17 = _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0];
-            const double xi_18 = _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0];
-            const double xi_19 = _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0];
-            const double vel0Term = xi_15 + xi_17 + xi_2 + xi_8 + xi_9;
-            const double vel1Term = xi_1 + xi_4 + xi_5 + xi_6;
-            const double vel2Term = xi_11 + xi_13 + xi_19;
-            const double delta_rho = vel0Term + vel1Term + vel2Term + xi_10 + xi_12 + xi_14 + xi_16 + xi_18 + xi_3 + xi_7;
-            const double u_0 = vel0Term + xi_11*-1.0 + xi_12*-1.0 + xi_14*-1.0 + xi_16*-1.0 + xi_5*-1.0;
-            const double u_1 = vel1Term + xi_12*-1.0 + xi_15*-1.0 + xi_18*-1.0 + xi_19*-1.0 + xi_7*-1.0 + xi_9;
-            const double u_2 = vel2Term + xi_1*-1.0 + xi_10*-1.0 + xi_16*-1.0 + xi_17 + xi_18*-1.0 + xi_2*-1.0 + xi_6;
-            const double u0Mu1 = u_0 + u_1*-1.0;
+            const double xi_1 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3];
+            const double xi_2 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3];
+            const double xi_3 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3];
+            const double xi_4 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3];
+            const double xi_5 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2];
+            const double xi_6 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3];
+            const double xi_7 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3];
+            const double xi_8 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3];
+            const double xi_9 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3];
+            const double xi_10 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3];
+            const double xi_11 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3];
+            const double xi_12 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3];
+            const double xi_13 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3];
+            const double xi_14 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3];
+            const double xi_15 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3];
+            const double xi_16 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3];
+            const double xi_17 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3];
+            const double xi_18 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3];
+            const double xi_19 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3];
+            const double vel0Term = xi_15 + xi_3 + xi_4 + xi_6 + xi_7;
+            const double vel1Term = xi_12 + xi_14 + xi_16 + xi_2;
+            const double vel2Term = xi_1 + xi_18 + xi_8;
+            const double delta_rho = vel0Term + vel1Term + vel2Term + xi_10 + xi_11 + xi_13 + xi_17 + xi_19 + xi_5 + xi_9;
+            const double u_0 = vel0Term - xi_1 - xi_17 - xi_19 - xi_2 - xi_9;
+            const double u_1 = vel1Term - xi_10 - xi_13 - xi_15 - xi_17 + xi_3 - xi_8;
+            const double u_2 = vel2Term - xi_11 - xi_13 + xi_14 - xi_16 - xi_4 + xi_6 - xi_9;
+            const double u0Mu1 = u_0 - u_1;
             const double u0Pu1 = u_0 + u_1;
             const double u1Pu2 = u_1 + u_2;
-            const double u1Mu2 = u_1 + u_2*-1.0;
-            const double u0Mu2 = u_0 + u_2*-1.0;
+            const double u1Mu2 = u_1 - u_2;
+            const double u0Mu2 = u_0 - u_2;
             const double u0Pu2 = u_0 + u_2;
-            const double f_eq_common = delta_rho - 1.0*(u_0*u_0) - 1.0*(u_1*u_1) - 1.0*(u_2*u_2);
-            _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.33333333333333331 + xi_3*-1.0) + xi_3;
-            _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*0.16666666666666666 + xi_4*-1.0 + 0.33333333333333331*(u_1*u_1)) + xi_4;
-            _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*-0.16666666666666666 + xi_7*-1.0 + 0.33333333333333331*(u_1*u_1)) + xi_7;
-            _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*-0.16666666666666666 + xi_14*-1.0 + 0.33333333333333331*(u_0*u_0)) + xi_14;
-            _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*0.16666666666666666 + xi_8*-1.0 + 0.33333333333333331*(u_0*u_0)) + xi_8;
-            _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*0.16666666666666666 + xi_13*-1.0 + 0.33333333333333331*(u_2*u_2)) + xi_13;
-            _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*-0.16666666666666666 + xi_10*-1.0 + 0.33333333333333331*(u_2*u_2)) + xi_10;
-            _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*-0.083333333333333329 + xi_5*-1.0 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)) + xi_5;
-            _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*0.083333333333333329 + xi_9*-1.0 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)) + xi_9;
-            _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*-0.083333333333333329 + xi_12*-1.0 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)) + xi_12;
-            _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*0.083333333333333329 + xi_15*-1.0 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)) + xi_15;
-            _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*0.083333333333333329 + xi_6*-1.0 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)) + xi_6;
-            _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*-0.083333333333333329 + xi_19*-1.0 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)) + xi_19;
-            _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*-0.083333333333333329 + xi_11*-1.0 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)) + xi_11;
-            _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*0.083333333333333329 + xi_17*-1.0 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)) + xi_17;
-            _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*0.083333333333333329 + xi_1*-1.0 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)) + xi_1;
-            _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*-0.083333333333333329 + xi_18*-1.0 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)) + xi_18;
-            _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*-0.083333333333333329 + xi_16*-1.0 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)) + xi_16;
-            _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*0.083333333333333329 + xi_2*-1.0 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)) + xi_2;
+            const double f_eq_common = delta_rho - u_0*u_0 - u_1*u_1 - u_2*u_2;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2] = omega*(f_eq_common*0.33333333333333331 - xi_5) + xi_5;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*0.16666666666666666 - xi_12 + 0.33333333333333331*(u_1*u_1)) + xi_12;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*-0.16666666666666666 - xi_10 + 0.33333333333333331*(u_1*u_1)) + xi_10;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*-0.16666666666666666 - xi_19 + 0.33333333333333331*(u_0*u_0)) + xi_19;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*0.16666666666666666 - xi_7 + 0.33333333333333331*(u_0*u_0)) + xi_7;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*0.16666666666666666 - xi_18 + 0.33333333333333331*(u_2*u_2)) + xi_18;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*-0.16666666666666666 - xi_11 + 0.33333333333333331*(u_2*u_2)) + xi_11;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*-0.083333333333333329 - xi_2 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)) + xi_2;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*0.083333333333333329 - xi_3 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)) + xi_3;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*-0.083333333333333329 - xi_17 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)) + xi_17;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*0.083333333333333329 - xi_15 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)) + xi_15;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*0.083333333333333329 - xi_14 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)) + xi_14;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*-0.083333333333333329 - xi_8 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)) + xi_8;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*-0.083333333333333329 - xi_1 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)) + xi_1;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*0.083333333333333329 - xi_6 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)) + xi_6;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*0.083333333333333329 - xi_16 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)) + xi_16;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*-0.083333333333333329 - xi_13 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)) + xi_13;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*-0.083333333333333329 - xi_9 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)) + xi_9;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*0.083333333333333329 - xi_4 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)) + xi_4;
          }
       }
    }
@@ -272,124 +158,48 @@ static FUNC_PREFIX void d3q19srt_kernel_stream(double * RESTRICT const _data_pdf
 {
    for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
-      double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2;
-      double * RESTRICT  _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
-         double * RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
-         double * RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
-         double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
-         double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
-         double * RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35;
-         double * RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36;
-         double * RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
-         double * RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
-         double * RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
-         double * RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
-         double * RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
-         double * RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
-         double * RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313;
-         double * RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314;
-         double * RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
-         double * RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
-         double * RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317;
-         double * RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318;
-         double * RESTRICT  _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30;
-         double * RESTRICT  _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31;
-         double * RESTRICT  _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32;
-         double * RESTRICT  _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33;
-         double * RESTRICT  _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34;
-         double * RESTRICT  _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35;
-         double * RESTRICT  _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36;
-         double * RESTRICT  _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37;
-         double * RESTRICT  _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38;
-         double * RESTRICT  _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39;
-         double * RESTRICT  _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310;
-         double * RESTRICT  _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311;
-         double * RESTRICT  _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312;
-         double * RESTRICT  _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313;
-         double * RESTRICT  _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314;
-         double * RESTRICT  _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315;
-         double * RESTRICT  _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316;
-         double * RESTRICT  _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317;
-         double * RESTRICT  _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318;
          for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1)
          {
-            const double streamed_0 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
-            const double streamed_1 = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0];
-            const double streamed_2 = _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0];
-            const double streamed_3 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_4 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_5 = _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
-            const double streamed_6 = _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
-            const double streamed_7 = _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_8 = _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_9 = _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_10 = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_11 = _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
-            const double streamed_12 = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0];
-            const double streamed_13 = _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_14 = _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_15 = _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0];
-            const double streamed_16 = _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0];
-            const double streamed_17 = _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_18 = _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = streamed_0;
-            _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = streamed_1;
-            _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = streamed_2;
-            _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = streamed_3;
-            _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = streamed_4;
-            _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = streamed_5;
-            _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = streamed_6;
-            _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = streamed_7;
-            _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = streamed_8;
-            _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = streamed_9;
-            _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = streamed_10;
-            _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = streamed_11;
-            _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = streamed_12;
-            _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = streamed_13;
-            _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = streamed_14;
-            _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = streamed_15;
-            _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = streamed_16;
-            _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = streamed_17;
-            _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = streamed_18;
+            const double streamed_0 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2];
+            const double streamed_1 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3];
+            const double streamed_2 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3];
+            const double streamed_3 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3];
+            const double streamed_4 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3];
+            const double streamed_5 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3];
+            const double streamed_6 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3];
+            const double streamed_7 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3];
+            const double streamed_8 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3];
+            const double streamed_9 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3];
+            const double streamed_10 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3];
+            const double streamed_11 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3];
+            const double streamed_12 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3];
+            const double streamed_13 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3];
+            const double streamed_14 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3];
+            const double streamed_15 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3];
+            const double streamed_16 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3];
+            const double streamed_17 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3];
+            const double streamed_18 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2] = streamed_0;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3] = streamed_1;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3] = streamed_2;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3] = streamed_3;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3] = streamed_4;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3] = streamed_5;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3] = streamed_6;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3] = streamed_7;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3] = streamed_8;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3] = streamed_9;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3] = streamed_10;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3] = streamed_11;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3] = streamed_12;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3] = streamed_13;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3] = streamed_14;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3] = streamed_15;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3] = streamed_16;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3] = streamed_17;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3] = streamed_18;
          }
       }
    }
@@ -402,124 +212,48 @@ static FUNC_PREFIX void d3q19srt_kernel_streamOnlyNoAdvancement(double * RESTRIC
 {
    for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
-      double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2;
-      double * RESTRICT  _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
-         double * RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
-         double * RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
-         double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
-         double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
-         double * RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35;
-         double * RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36;
-         double * RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
-         double * RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
-         double * RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
-         double * RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
-         double * RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
-         double * RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
-         double * RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313;
-         double * RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314;
-         double * RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
-         double * RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
-         double * RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317;
-         double * RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318;
-         double * RESTRICT  _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30;
-         double * RESTRICT  _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31;
-         double * RESTRICT  _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32;
-         double * RESTRICT  _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33;
-         double * RESTRICT  _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34;
-         double * RESTRICT  _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35;
-         double * RESTRICT  _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36;
-         double * RESTRICT  _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37;
-         double * RESTRICT  _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38;
-         double * RESTRICT  _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39;
-         double * RESTRICT  _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310;
-         double * RESTRICT  _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311;
-         double * RESTRICT  _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312;
-         double * RESTRICT  _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313;
-         double * RESTRICT  _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314;
-         double * RESTRICT  _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315;
-         double * RESTRICT  _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316;
-         double * RESTRICT  _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317;
-         double * RESTRICT  _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318;
          for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1)
          {
-            const double streamed_0 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
-            const double streamed_1 = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0];
-            const double streamed_2 = _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0];
-            const double streamed_3 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_4 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_5 = _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
-            const double streamed_6 = _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
-            const double streamed_7 = _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_8 = _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_9 = _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_10 = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_11 = _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
-            const double streamed_12 = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0];
-            const double streamed_13 = _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_14 = _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_15 = _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0];
-            const double streamed_16 = _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0];
-            const double streamed_17 = _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_18 = _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = streamed_0;
-            _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = streamed_1;
-            _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = streamed_2;
-            _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = streamed_3;
-            _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = streamed_4;
-            _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = streamed_5;
-            _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = streamed_6;
-            _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = streamed_7;
-            _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = streamed_8;
-            _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = streamed_9;
-            _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = streamed_10;
-            _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = streamed_11;
-            _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = streamed_12;
-            _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = streamed_13;
-            _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = streamed_14;
-            _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = streamed_15;
-            _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = streamed_16;
-            _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = streamed_17;
-            _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = streamed_18;
+            const double streamed_0 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2];
+            const double streamed_1 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3];
+            const double streamed_2 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3];
+            const double streamed_3 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3];
+            const double streamed_4 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3];
+            const double streamed_5 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3];
+            const double streamed_6 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3];
+            const double streamed_7 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3];
+            const double streamed_8 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3];
+            const double streamed_9 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3];
+            const double streamed_10 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3];
+            const double streamed_11 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3];
+            const double streamed_12 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3];
+            const double streamed_13 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3];
+            const double streamed_14 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3];
+            const double streamed_15 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3];
+            const double streamed_16 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3];
+            const double streamed_17 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3];
+            const double streamed_18 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2] = streamed_0;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3] = streamed_1;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3] = streamed_2;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3] = streamed_3;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3] = streamed_4;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3] = streamed_5;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3] = streamed_6;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3] = streamed_7;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3] = streamed_8;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3] = streamed_9;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3] = streamed_10;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3] = streamed_11;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3] = streamed_12;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3] = streamed_13;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3] = streamed_14;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3] = streamed_15;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3] = streamed_16;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3] = streamed_17;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3] = streamed_18;
          }
       }
    }
@@ -532,80 +266,34 @@ static FUNC_PREFIX void d3q19srt_kernel_initialise(double * RESTRICT const _data
 {
    for (int64_t ctr_2 = 0; ctr_2 < _size_density_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_density_20_30 = _data_density + _stride_density_2*ctr_2;
-      double * RESTRICT _data_velocity_20_30 = _data_velocity + _stride_velocity_2*ctr_2;
-      double * RESTRICT _data_velocity_20_31 = _data_velocity + _stride_velocity_2*ctr_2 + _stride_velocity_3;
-      double * RESTRICT _data_velocity_20_32 = _data_velocity + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3;
-      double * RESTRICT  _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
-      double * RESTRICT  _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_density_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_density_20_30_10 = _stride_density_1*ctr_1 + _data_density_20_30;
-         double * RESTRICT _data_velocity_20_30_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_30;
-         double * RESTRICT _data_velocity_20_31_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_31;
-         double * RESTRICT _data_velocity_20_32_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_32;
-         double * RESTRICT  _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
-         double * RESTRICT  _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31;
-         double * RESTRICT  _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32;
-         double * RESTRICT  _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
-         double * RESTRICT  _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
-         double * RESTRICT  _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35;
-         double * RESTRICT  _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36;
-         double * RESTRICT  _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37;
-         double * RESTRICT  _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38;
-         double * RESTRICT  _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39;
-         double * RESTRICT  _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310;
-         double * RESTRICT  _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311;
-         double * RESTRICT  _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312;
-         double * RESTRICT  _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313;
-         double * RESTRICT  _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314;
-         double * RESTRICT  _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315;
-         double * RESTRICT  _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316;
-         double * RESTRICT  _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317;
-         double * RESTRICT  _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318;
          for (int64_t ctr_0 = 0; ctr_0 < _size_density_0; ctr_0 += 1)
          {
-            const double rho = _data_density_20_30_10[_stride_density_0*ctr_0];
+            const double rho = _data_density[_stride_density_0*ctr_0 + _stride_density_1*ctr_1 + _stride_density_2*ctr_2];
             const double delta_rho = rho - 1.0;
-            const double u_0 = _data_velocity_20_30_10[_stride_velocity_0*ctr_0];
-            const double u_1 = _data_velocity_20_31_10[_stride_velocity_0*ctr_0];
-            const double u_2 = _data_velocity_20_32_10[_stride_velocity_0*ctr_0];
-            _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] = delta_rho*0.33333333333333331 - 0.33333333333333331*(u_0*u_0) - 0.33333333333333331*(u_1*u_1) - 0.33333333333333331*(u_2*u_2);
-            _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] = delta_rho*0.055555555555555552 + u_1*0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_1*u_1);
-            _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] = delta_rho*0.055555555555555552 + u_1*-0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_1*u_1);
-            _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] = delta_rho*0.055555555555555552 + u_0*-0.16666666666666666 - 0.16666666666666666*(u_1*u_1) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_0*u_0);
-            _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] = delta_rho*0.055555555555555552 + u_0*0.16666666666666666 - 0.16666666666666666*(u_1*u_1) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_0*u_0);
-            _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0] = delta_rho*0.055555555555555552 + u_2*0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_1*u_1) + 0.16666666666666666*(u_2*u_2);
-            _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] = delta_rho*0.055555555555555552 + u_2*-0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_1*u_1) + 0.16666666666666666*(u_2*u_2);
-            _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_1*-0.25 + u_0*-0.083333333333333329 + u_1*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1);
-            _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_1*0.25 + u_0*0.083333333333333329 + u_1*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1);
-            _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_1*0.25 + u_0*-0.083333333333333329 + u_1*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1);
-            _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_1*-0.25 + u_0*0.083333333333333329 + u_1*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1);
-            _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_1*u_2*0.25 + u_1*0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2);
-            _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_1*u_2*-0.25 + u_1*-0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2);
-            _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_2*-0.25 + u_0*-0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2);
-            _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_2*0.25 + u_0*0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2);
-            _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_1*u_2*-0.25 + u_1*0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2);
-            _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_1*u_2*0.25 + u_1*-0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2);
-            _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_2*0.25 + u_0*-0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2);
-            _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_2*-0.25 + u_0*0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2);
+            const double u_0 = _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2];
+            const double u_1 = _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2 + _stride_velocity_3];
+            const double u_2 = _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3];
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2] = delta_rho*0.33333333333333331 - 0.33333333333333331*(u_0*u_0) - 0.33333333333333331*(u_1*u_1) - 0.33333333333333331*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3] = delta_rho*0.055555555555555552 + u_1*0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_1*u_1);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] = delta_rho*0.055555555555555552 + u_1*-0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_1*u_1);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] = delta_rho*0.055555555555555552 + u_0*-0.16666666666666666 - 0.16666666666666666*(u_1*u_1) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_0*u_0);
+            _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3] = delta_rho*0.055555555555555552 + u_0*0.16666666666666666 - 0.16666666666666666*(u_1*u_1) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_0*u_0);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3] = delta_rho*0.055555555555555552 + u_2*0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_1*u_1) + 0.16666666666666666*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3] = delta_rho*0.055555555555555552 + u_2*-0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_1*u_1) + 0.16666666666666666*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_0*u_1*-0.25 + u_0*-0.083333333333333329 + u_1*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1);
+            _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_0*u_1*0.25 + u_0*0.083333333333333329 + u_1*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_0*u_1*0.25 + u_0*-0.083333333333333329 + u_1*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1);
+            _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_0*u_1*-0.25 + u_0*0.083333333333333329 + u_1*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_1*u_2*0.25 + u_1*0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_1*u_2*-0.25 + u_1*-0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_0*u_2*-0.25 + u_0*-0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_0*u_2*0.25 + u_0*0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_1*u_2*-0.25 + u_1*0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_1*u_2*0.25 + u_1*-0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_0*u_2*0.25 + u_0*-0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_0*u_2*-0.25 + u_0*0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2);
          }
       }
    }
@@ -618,71 +306,25 @@ static FUNC_PREFIX void d3q19srt_kernel_getter(double * RESTRICT  _data_density,
 {
    for (int64_t ctr_2 = 0; ctr_2 < _size_density_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
-      double * RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
-      double * RESTRICT  _data_density_20_30 = _data_density + _stride_density_2*ctr_2;
-      double * RESTRICT  _data_velocity_20_30 = _data_velocity + _stride_velocity_2*ctr_2;
-      double * RESTRICT  _data_velocity_20_31 = _data_velocity + _stride_velocity_2*ctr_2 + _stride_velocity_3;
-      double * RESTRICT  _data_velocity_20_32 = _data_velocity + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_density_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310;
-         double * RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314;
-         double * RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318;
-         double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
-         double * RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38;
-         double * RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313;
-         double * RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317;
-         double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
-         double * RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37;
-         double * RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39;
-         double * RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31;
-         double * RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311;
-         double * RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315;
-         double * RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312;
-         double * RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316;
-         double * RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32;
-         double * RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35;
-         double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
-         double * RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36;
-         double * RESTRICT  _data_density_20_30_10 = _stride_density_1*ctr_1 + _data_density_20_30;
-         double * RESTRICT  _data_velocity_20_30_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_30;
-         double * RESTRICT  _data_velocity_20_31_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_31;
-         double * RESTRICT  _data_velocity_20_32_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_32;
          for (int64_t ctr_0 = 0; ctr_0 < _size_density_0; ctr_0 += 1)
          {
-            const double vel0Term = _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0];
-            const double momdensity_0 = vel0Term - 1.0*_data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_39_10[_stride_pdfs_0*ctr_0];
-            const double vel1Term = _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0];
-            const double momdensity_1 = vel1Term - 1.0*_data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0];
-            const double vel2Term = _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0];
-            const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0];
-            const double momdensity_2 = vel2Term - 1.0*_data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0];
+            const double vel0Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3];
+            const double momdensity_0 = vel0Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3];
+            const double vel1Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3];
+            const double momdensity_1 = vel1Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3];
+            const double vel2Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3];
+            const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2];
+            const double momdensity_2 = vel2Term + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3];
             const double rho = delta_rho + 1.0;
             const double u_0 = momdensity_0;
             const double u_1 = momdensity_1;
             const double u_2 = momdensity_2;
-            _data_density_20_30_10[_stride_density_0*ctr_0] = rho;
-            _data_velocity_20_30_10[_stride_velocity_0*ctr_0] = u_0;
-            _data_velocity_20_31_10[_stride_velocity_0*ctr_0] = u_1;
-            _data_velocity_20_32_10[_stride_velocity_0*ctr_0] = u_2;
+            _data_density[_stride_density_0*ctr_0 + _stride_density_1*ctr_1 + _stride_density_2*ctr_2] = rho;
+            _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2] = u_0;
+            _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2 + _stride_velocity_3] = u_1;
+            _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3] = u_2;
          }
       }
    }
diff --git a/src/lbm_generated/sweep_collection/D3Q19SRT.h b/src/lbm_generated/sweep_collection/D3Q19SRT.h
index 2fdb3850cb000daf544b265fa4ae3808253ddc00..56475b66e1a715cb8416cbb7b28711012254ae45 100644
--- a/src/lbm_generated/sweep_collection/D3Q19SRT.h
+++ b/src/lbm_generated/sweep_collection/D3Q19SRT.h
@@ -59,20 +59,20 @@ namespace lbm {
 
 class D3Q19SRT
 {
-public:
-  enum Type { ALL = 0, INNER = 1, OUTER = 2 };
+ public:
+   enum Type { ALL = 0, INNER = 1, OUTER = 2 };
 
    D3Q19SRT(const shared_ptr< StructuredBlockStorage > & blocks, BlockDataID pdfsID_, BlockDataID densityID_, BlockDataID velocityID_, double omega, const Cell & outerWidth=Cell(1, 1, 1))
-     : blocks_(blocks), pdfsID(pdfsID_), densityID(densityID_), velocityID(velocityID_), omega_(omega), outerWidth_(outerWidth)
+      : blocks_(blocks), pdfsID(pdfsID_), densityID(densityID_), velocityID(velocityID_), omega_(omega), outerWidth_(outerWidth)
    {
       
 
+      validInnerOuterSplit_= true;
+
       for (auto& iBlock : *blocks)
       {
-         if (int_c(blocks->getNumberOfXCells(iBlock)) <= outerWidth_[0] * 2 ||
-             int_c(blocks->getNumberOfYCells(iBlock)) <= outerWidth_[1] * 2 ||
-             int_c(blocks->getNumberOfZCells(iBlock)) <= outerWidth_[2] * 2)
-          WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller or increase cellsPerBlock")
+         if (int_c(blocks->getNumberOfXCells(iBlock)) <= outerWidth_[0] * 2 || int_c(blocks->getNumberOfYCells(iBlock)) <= outerWidth_[1] * 2 || int_c(blocks->getNumberOfZCells(iBlock)) <= outerWidth_[2] * 2)
+            validInnerOuterSplit_ = false;
       }
    };
 
@@ -117,27 +117,33 @@ public:
 
    std::function<void (IBlock *)> streamCollide(Type type)
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [this](IBlock* block) { streamCollideInner(block); };
-         case Type::OUTER:
-            return [this](IBlock* block) { streamCollideOuter(block); };
-         default:
-            return [this](IBlock* block) { streamCollide(block); };
+      case Type::INNER:
+         return [this](IBlock* block) { streamCollideInner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { streamCollideOuter(block); };
+      default:
+         return [this](IBlock* block) { streamCollide(block); };
       }
    }
 
    std::function<void (IBlock *)> streamCollide(Type type, const cell_idx_t ghost_layers)
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [this](IBlock* block) { streamCollideInner(block); };
-         case Type::OUTER:
-            return [this](IBlock* block) { streamCollideOuter(block); };
-         default:
-            return [this, ghost_layers](IBlock* block) { streamCollide(block, ghost_layers); };
+      case Type::INNER:
+         return [this](IBlock* block) { streamCollideInner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { streamCollideOuter(block); };
+      default:
+         return [this, ghost_layers](IBlock* block) { streamCollide(block, ghost_layers); };
       }
    }
 
@@ -298,14 +304,14 @@ public:
          layers_.push_back(ci);
       }
 
-    
+      
       for( auto & ci: layers_ )
       {
          streamCollideCellInterval(pdfs, pdfs_tmp, omega, ci);
       }
-    
+      
 
-    pdfs->swapDataPointers(pdfs_tmp);
+      pdfs->swapDataPointers(pdfs_tmp);
 
    }
    
@@ -317,27 +323,33 @@ public:
 
    std::function<void (IBlock *)> collide(Type type)
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [this](IBlock* block) { collideInner(block); };
-         case Type::OUTER:
-            return [this](IBlock* block) { collideOuter(block); };
-         default:
-            return [this](IBlock* block) { collide(block); };
+      case Type::INNER:
+         return [this](IBlock* block) { collideInner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { collideOuter(block); };
+      default:
+         return [this](IBlock* block) { collide(block); };
       }
    }
 
    std::function<void (IBlock *)> collide(Type type, const cell_idx_t ghost_layers)
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [this](IBlock* block) { collideInner(block); };
-         case Type::OUTER:
-            return [this](IBlock* block) { collideOuter(block); };
-         default:
-            return [this, ghost_layers](IBlock* block) { collide(block, ghost_layers); };
+      case Type::INNER:
+         return [this](IBlock* block) { collideInner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { collideOuter(block); };
+      default:
+         return [this, ghost_layers](IBlock* block) { collide(block, ghost_layers); };
       }
    }
 
@@ -425,14 +437,14 @@ public:
          layers_.push_back(ci);
       }
 
-    
+      
       for( auto & ci: layers_ )
       {
          collideCellInterval(pdfs, omega, ci);
       }
-    
+      
 
-    
+      
    }
    
 
@@ -443,27 +455,33 @@ public:
 
    std::function<void (IBlock *)> stream(Type type)
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [this](IBlock* block) { streamInner(block); };
-         case Type::OUTER:
-            return [this](IBlock* block) { streamOuter(block); };
-         default:
-            return [this](IBlock* block) { stream(block); };
+      case Type::INNER:
+         return [this](IBlock* block) { streamInner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { streamOuter(block); };
+      default:
+         return [this](IBlock* block) { stream(block); };
       }
    }
 
    std::function<void (IBlock *)> stream(Type type, const cell_idx_t ghost_layers)
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [this](IBlock* block) { streamInner(block); };
-         case Type::OUTER:
-            return [this](IBlock* block) { streamOuter(block); };
-         default:
-            return [this, ghost_layers](IBlock* block) { stream(block, ghost_layers); };
+      case Type::INNER:
+         return [this](IBlock* block) { streamInner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { streamOuter(block); };
+      default:
+         return [this, ghost_layers](IBlock* block) { stream(block, ghost_layers); };
       }
    }
 
@@ -624,14 +642,14 @@ public:
          layers_.push_back(ci);
       }
 
-    
+      
       for( auto & ci: layers_ )
       {
          streamCellInterval(pdfs, pdfs_tmp, ci);
       }
-    
+      
 
-    pdfs->swapDataPointers(pdfs_tmp);
+      pdfs->swapDataPointers(pdfs_tmp);
 
    }
    
@@ -643,27 +661,33 @@ public:
 
    std::function<void (IBlock *)> streamOnlyNoAdvancement(Type type)
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); };
-         case Type::OUTER:
-            return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); };
-         default:
-            return [this](IBlock* block) { streamOnlyNoAdvancement(block); };
+      case Type::INNER:
+         return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); };
+      default:
+         return [this](IBlock* block) { streamOnlyNoAdvancement(block); };
       }
    }
 
    std::function<void (IBlock *)> streamOnlyNoAdvancement(Type type, const cell_idx_t ghost_layers)
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); };
-         case Type::OUTER:
-            return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); };
-         default:
-            return [this, ghost_layers](IBlock* block) { streamOnlyNoAdvancement(block, ghost_layers); };
+      case Type::INNER:
+         return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); };
+      default:
+         return [this, ghost_layers](IBlock* block) { streamOnlyNoAdvancement(block, ghost_layers); };
       }
    }
 
@@ -821,14 +845,14 @@ public:
          layers_.push_back(ci);
       }
 
-    
+      
       for( auto & ci: layers_ )
       {
          streamOnlyNoAdvancementCellInterval(pdfs, pdfs_tmp, ci);
       }
-    
+      
 
-    
+      
    }
    
 
@@ -839,27 +863,33 @@ public:
 
    std::function<void (IBlock *)> initialise(Type type)
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [this](IBlock* block) { initialiseInner(block); };
-         case Type::OUTER:
-            return [this](IBlock* block) { initialiseOuter(block); };
-         default:
-            return [this](IBlock* block) { initialise(block); };
+      case Type::INNER:
+         return [this](IBlock* block) { initialiseInner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { initialiseOuter(block); };
+      default:
+         return [this](IBlock* block) { initialise(block); };
       }
    }
 
    std::function<void (IBlock *)> initialise(Type type, const cell_idx_t ghost_layers)
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [this](IBlock* block) { initialiseInner(block); };
-         case Type::OUTER:
-            return [this](IBlock* block) { initialiseOuter(block); };
-         default:
-            return [this, ghost_layers](IBlock* block) { initialise(block, ghost_layers); };
+      case Type::INNER:
+         return [this](IBlock* block) { initialiseInner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { initialiseOuter(block); };
+      default:
+         return [this, ghost_layers](IBlock* block) { initialise(block, ghost_layers); };
       }
    }
 
@@ -870,9 +900,9 @@ public:
       const cell_idx_t ghost_layers = 0;
       
 
-      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
-      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
       auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
 
       
       
@@ -884,9 +914,9 @@ public:
    {
       
 
-      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
-      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
       auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
 
       
       
@@ -898,9 +928,9 @@ public:
 
    void initialiseCellInterval(IBlock * block, const CellInterval & ci)
    {
-      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
-      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
       auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
 
       
       
@@ -910,9 +940,9 @@ public:
 
    void initialiseInner(IBlock * block)
    {
-      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
-      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
       auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
 
       
       
@@ -926,9 +956,9 @@ public:
    void initialiseOuter(IBlock * block)
    {
 
-      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
-      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
       auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
 
       
       
@@ -957,14 +987,14 @@ public:
          layers_.push_back(ci);
       }
 
-    
+      
       for( auto & ci: layers_ )
       {
          initialiseCellInterval(density, pdfs, velocity, ci);
       }
-    
+      
 
-    
+      
    }
    
 
@@ -975,27 +1005,33 @@ public:
 
    std::function<void (IBlock *)> calculateMacroscopicParameters(Type type)
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [this](IBlock* block) { calculateMacroscopicParametersInner(block); };
-         case Type::OUTER:
-            return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); };
-         default:
-            return [this](IBlock* block) { calculateMacroscopicParameters(block); };
+      case Type::INNER:
+         return [this](IBlock* block) { calculateMacroscopicParametersInner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); };
+      default:
+         return [this](IBlock* block) { calculateMacroscopicParameters(block); };
       }
    }
 
    std::function<void (IBlock *)> calculateMacroscopicParameters(Type type, const cell_idx_t ghost_layers)
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [this](IBlock* block) { calculateMacroscopicParametersInner(block); };
-         case Type::OUTER:
-            return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); };
-         default:
-            return [this, ghost_layers](IBlock* block) { calculateMacroscopicParameters(block, ghost_layers); };
+      case Type::INNER:
+         return [this](IBlock* block) { calculateMacroscopicParametersInner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); };
+      default:
+         return [this, ghost_layers](IBlock* block) { calculateMacroscopicParameters(block, ghost_layers); };
       }
    }
 
@@ -1006,9 +1042,9 @@ public:
       const cell_idx_t ghost_layers = 0;
       
 
-      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
-      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
       auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
 
       
       
@@ -1020,9 +1056,9 @@ public:
    {
       
 
-      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
-      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
       auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
 
       
       
@@ -1034,9 +1070,9 @@ public:
 
    void calculateMacroscopicParametersCellInterval(IBlock * block, const CellInterval & ci)
    {
-      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
-      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
       auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
 
       
       
@@ -1046,9 +1082,9 @@ public:
 
    void calculateMacroscopicParametersInner(IBlock * block)
    {
-      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
-      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
       auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
 
       
       
@@ -1062,9 +1098,9 @@ public:
    void calculateMacroscopicParametersOuter(IBlock * block)
    {
 
-      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
-      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
       auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
 
       
       
@@ -1093,32 +1129,33 @@ public:
          layers_.push_back(ci);
       }
 
-    
+      
       for( auto & ci: layers_ )
       {
          calculateMacroscopicParametersCellInterval(density, pdfs, velocity, ci);
       }
-    
+      
 
-    
+      
    }
    
 
    
 
-   private:
-      shared_ptr< StructuredBlockStorage > blocks_;
-      BlockDataID pdfsID;
+ private:
+   shared_ptr< StructuredBlockStorage > blocks_;
+   BlockDataID pdfsID;
     BlockDataID densityID;
     BlockDataID velocityID;
     double omega_;
 
     private: std::set< field::GhostLayerField<double, 19> *, field::SwapableCompare< field::GhostLayerField<double, 19> * > > cache_pdfs_;
 
-      Cell outerWidth_;
-      std::vector<CellInterval> layers_;
+   Cell outerWidth_;
+   std::vector<CellInterval> layers_;
+   bool validInnerOuterSplit_;
 
-      
+   
 };
 
 
diff --git a/src/lbm_generated/sweep_collection/D3Q27SRT.cpp b/src/lbm_generated/sweep_collection/D3Q27SRT.cpp
index ce89749fc60ab603f3172992cb46c65242e57d16..24a8f232c494d290ffb931d1dc9e33d093048b75 100644
--- a/src/lbm_generated/sweep_collection/D3Q27SRT.cpp
+++ b/src/lbm_generated/sweep_collection/D3Q27SRT.cpp
@@ -41,159 +41,51 @@ static FUNC_PREFIX void d3q27srt_kernel_streamCollide(double * RESTRICT const _d
 {
    for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_2m1_321 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_319 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_325 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_323 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_320 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_324 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_322 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
-      double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_326 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2;
-      double * RESTRICT  _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_319 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 19*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_320 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 20*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_321 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 21*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_322 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 22*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_323 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 23*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_324 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 24*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_325 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 25*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_326 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 26*_stride_pdfs_tmp_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_2m1_321_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_321;
-         double * RESTRICT _data_pdfs_2m1_319_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_319;
-         double * RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314;
-         double * RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
-         double * RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
-         double * RESTRICT _data_pdfs_21_325_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_325;
-         double * RESTRICT _data_pdfs_21_323_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_323;
-         double * RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318;
-         double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
-         double * RESTRICT _data_pdfs_2m1_320_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_320;
-         double * RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
-         double * RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
-         double * RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
-         double * RESTRICT _data_pdfs_21_324_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_324;
-         double * RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
-         double * RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313;
-         double * RESTRICT _data_pdfs_2m1_322_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_322;
-         double * RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
-         double * RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35;
-         double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
-         double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
-         double * RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
-         double * RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
-         double * RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317;
-         double * RESTRICT _data_pdfs_21_326_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_326;
-         double * RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
-         double * RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36;
-         double * RESTRICT  _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30;
-         double * RESTRICT  _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31;
-         double * RESTRICT  _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32;
-         double * RESTRICT  _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33;
-         double * RESTRICT  _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34;
-         double * RESTRICT  _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35;
-         double * RESTRICT  _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36;
-         double * RESTRICT  _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37;
-         double * RESTRICT  _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38;
-         double * RESTRICT  _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39;
-         double * RESTRICT  _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310;
-         double * RESTRICT  _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311;
-         double * RESTRICT  _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312;
-         double * RESTRICT  _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313;
-         double * RESTRICT  _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314;
-         double * RESTRICT  _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315;
-         double * RESTRICT  _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316;
-         double * RESTRICT  _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317;
-         double * RESTRICT  _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318;
-         double * RESTRICT  _data_pdfs_tmp_20_319_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_319;
-         double * RESTRICT  _data_pdfs_tmp_20_320_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_320;
-         double * RESTRICT  _data_pdfs_tmp_20_321_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_321;
-         double * RESTRICT  _data_pdfs_tmp_20_322_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_322;
-         double * RESTRICT  _data_pdfs_tmp_20_323_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_323;
-         double * RESTRICT  _data_pdfs_tmp_20_324_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_324;
-         double * RESTRICT  _data_pdfs_tmp_20_325_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_325;
-         double * RESTRICT  _data_pdfs_tmp_20_326_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_326;
          for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1)
          {
-            const double vel0Term = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double vel1Term = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double vel2Term = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
-            const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] + _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
-            const double u_0 = vel0Term - 1.0*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double u_1 = vel1Term - 1.0*_data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double u_2 = vel2Term - 1.0*_data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double u0Mu1 = u_0 + u_1*-1.0;
+            const double vel0Term = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3];
+            const double vel1Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3];
+            const double vel2Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3];
+            const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2];
+            const double u_0 = vel0Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3];
+            const double u_1 = vel1Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3];
+            const double u_2 = vel2Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3];
+            const double u0Mu1 = u_0 - u_1;
             const double u0Pu1 = u_0 + u_1;
             const double u1Pu2 = u_1 + u_2;
-            const double u1Mu2 = u_1 + u_2*-1.0;
-            const double u0Mu2 = u_0 + u_2*-1.0;
+            const double u1Mu2 = u_1 - u_2;
+            const double u0Mu2 = u_0 - u_2;
             const double u0Pu2 = u_0 + u_2;
             const double f_eq_common = delta_rho - 1.5*(u_0*u_0) - 1.5*(u_1*u_1) - 1.5*(u_2*u_2);
-            _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.29629629629629628 - 1.0*_data_pdfs_20_30_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_1*0.22222222222222221 - 1.0*_data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1)) + _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_1*-0.22222222222222221 - 1.0*_data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1)) + _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_0*-0.22222222222222221 - 1.0*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0)) + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_0*0.22222222222222221 - 1.0*_data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0)) + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_2*0.22222222222222221 - 1.0*_data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_2*u_2)) + _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_2*-0.22222222222222221 - 1.0*_data_pdfs_21_36_10[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_2*u_2)) + _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*-0.055555555555555552 - 1.0*_data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.083333333333333329*(u0Mu1*u0Mu1)) + _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*0.055555555555555552 - 1.0*_data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.083333333333333329*(u0Pu1*u0Pu1)) + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*-0.055555555555555552 - 1.0*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.083333333333333329*(u0Pu1*u0Pu1)) + _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*0.055555555555555552 - 1.0*_data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.083333333333333329*(u0Mu1*u0Mu1)) + _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*0.055555555555555552 - 1.0*_data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0] + 0.083333333333333329*(u1Pu2*u1Pu2)) + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*-0.055555555555555552 - 1.0*_data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] + 0.083333333333333329*(u1Mu2*u1Mu2)) + _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*-0.055555555555555552 - 1.0*_data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.083333333333333329*(u0Mu2*u0Mu2)) + _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*0.055555555555555552 - 1.0*_data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.083333333333333329*(u0Pu2*u0Pu2)) + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*0.055555555555555552 - 1.0*_data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] + 0.083333333333333329*(u1Mu2*u1Mu2)) + _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*-0.055555555555555552 - 1.0*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] + 0.083333333333333329*(u1Pu2*u1Pu2)) + _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*-0.055555555555555552 - 1.0*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.083333333333333329*(u0Pu2*u0Pu2)) + _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*0.055555555555555552 - 1.0*_data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.083333333333333329*(u0Mu2*u0Mu2)) + _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            _data_pdfs_tmp_20_319_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*0.013888888888888888 - 1.0*_data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + _data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            _data_pdfs_tmp_20_320_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*0.013888888888888888 - 1.0*_data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + _data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_321_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*0.013888888888888888 - 1.0*_data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)) + _data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            _data_pdfs_tmp_20_322_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*0.013888888888888888 - 1.0*_data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)) + _data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_323_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*-0.013888888888888888 - 1.0*_data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)) + _data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            _data_pdfs_tmp_20_324_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*-0.013888888888888888 - 1.0*_data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)) + _data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_325_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*-0.013888888888888888 - 1.0*_data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + _data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            _data_pdfs_tmp_20_326_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*-0.013888888888888888 - 1.0*_data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + _data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2] = omega*(f_eq_common*0.29629629629629628 - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3] = omega*(f_eq_common*0.07407407407407407 + u_1*0.22222222222222221 + 0.33333333333333331*(u_1*u_1) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.07407407407407407 + u_1*-0.22222222222222221 + 0.33333333333333331*(u_1*u_1) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.07407407407407407 + u_0*-0.22222222222222221 + 0.33333333333333331*(u_0*u_0) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.07407407407407407 + u_0*0.22222222222222221 + 0.33333333333333331*(u_0*u_0) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.07407407407407407 + u_2*0.22222222222222221 + 0.33333333333333331*(u_2*u_2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.07407407407407407 + u_2*-0.22222222222222221 + 0.33333333333333331*(u_2*u_2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*-0.055555555555555552 + 0.083333333333333329*(u0Mu1*u0Mu1) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*0.055555555555555552 + 0.083333333333333329*(u0Pu1*u0Pu1) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*-0.055555555555555552 + 0.083333333333333329*(u0Pu1*u0Pu1) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*0.055555555555555552 + 0.083333333333333329*(u0Mu1*u0Mu1) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*0.055555555555555552 + 0.083333333333333329*(u1Pu2*u1Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*-0.055555555555555552 + 0.083333333333333329*(u1Mu2*u1Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*-0.055555555555555552 + 0.083333333333333329*(u0Mu2*u0Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*0.055555555555555552 + 0.083333333333333329*(u0Pu2*u0Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*0.055555555555555552 + 0.083333333333333329*(u1Mu2*u1Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*-0.055555555555555552 + 0.083333333333333329*(u1Pu2*u1Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*-0.055555555555555552 + 0.083333333333333329*(u0Pu2*u0Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*0.055555555555555552 + 0.083333333333333329*(u0Mu2*u0Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 19*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*0.013888888888888888 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 20*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*0.013888888888888888 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 21*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*0.013888888888888888 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 22*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*0.013888888888888888 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 23*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*-0.013888888888888888 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 24*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*-0.013888888888888888 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 25*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*-0.013888888888888888 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 26*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*-0.013888888888888888 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3];
          }
       }
    }
@@ -206,132 +98,78 @@ static FUNC_PREFIX void d3q27srt_kernel_collide(double * RESTRICT  _data_pdfs, i
 {
    for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1)
    {
-      double * RESTRICT  _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_326 = _data_pdfs + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_323 = _data_pdfs + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_321 = _data_pdfs + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_324 = _data_pdfs + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_319 = _data_pdfs + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_325 = _data_pdfs + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_320 = _data_pdfs + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
-      double * RESTRICT  _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_322 = _data_pdfs + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1)
       {
-         double * RESTRICT  _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313;
-         double * RESTRICT  _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36;
-         double * RESTRICT  _data_pdfs_20_326_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_326;
-         double * RESTRICT  _data_pdfs_20_323_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_323;
-         double * RESTRICT  _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310;
-         double * RESTRICT  _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316;
-         double * RESTRICT  _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312;
-         double * RESTRICT  _data_pdfs_20_321_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_321;
-         double * RESTRICT  _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311;
-         double * RESTRICT  _data_pdfs_20_324_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_324;
-         double * RESTRICT  _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37;
-         double * RESTRICT  _data_pdfs_20_319_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_319;
-         double * RESTRICT  _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31;
-         double * RESTRICT  _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38;
-         double * RESTRICT  _data_pdfs_20_325_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_325;
-         double * RESTRICT  _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32;
-         double * RESTRICT  _data_pdfs_20_320_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_320;
-         double * RESTRICT  _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39;
-         double * RESTRICT  _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35;
-         double * RESTRICT  _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
-         double * RESTRICT  _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315;
-         double * RESTRICT  _data_pdfs_20_322_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_322;
-         double * RESTRICT  _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317;
-         double * RESTRICT  _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314;
-         double * RESTRICT  _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318;
-         double * RESTRICT  _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
-         double * RESTRICT  _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
          for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1)
          {
-            const double xi_1 = _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0];
-            const double xi_2 = _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0];
-            const double xi_3 = _data_pdfs_20_326_10[_stride_pdfs_0*ctr_0];
-            const double xi_4 = _data_pdfs_20_323_10[_stride_pdfs_0*ctr_0];
-            const double xi_5 = _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0];
-            const double xi_6 = _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0];
-            const double xi_7 = _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0];
-            const double xi_8 = _data_pdfs_20_321_10[_stride_pdfs_0*ctr_0];
-            const double xi_9 = _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0];
-            const double xi_10 = _data_pdfs_20_324_10[_stride_pdfs_0*ctr_0];
-            const double xi_11 = _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0];
-            const double xi_12 = _data_pdfs_20_319_10[_stride_pdfs_0*ctr_0];
-            const double xi_13 = _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0];
-            const double xi_14 = _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0];
-            const double xi_15 = _data_pdfs_20_325_10[_stride_pdfs_0*ctr_0];
-            const double xi_16 = _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0];
-            const double xi_17 = _data_pdfs_20_320_10[_stride_pdfs_0*ctr_0];
-            const double xi_18 = _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0];
-            const double xi_19 = _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0];
-            const double xi_20 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
-            const double xi_21 = _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0];
-            const double xi_22 = _data_pdfs_20_322_10[_stride_pdfs_0*ctr_0];
-            const double xi_23 = _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0];
-            const double xi_24 = _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0];
-            const double xi_25 = _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0];
-            const double xi_26 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0];
-            const double xi_27 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0];
-            const double vel0Term = xi_12 + xi_14 + xi_15 + xi_24 + xi_25 + xi_26 + xi_4 + xi_5 + xi_8;
-            const double vel1Term = xi_10 + xi_11 + xi_13 + xi_17 + xi_21 + xi_9;
-            const double vel2Term = xi_1 + xi_19 + xi_22 + xi_7;
-            const double delta_rho = vel0Term + vel1Term + vel2Term + xi_16 + xi_18 + xi_2 + xi_20 + xi_23 + xi_27 + xi_3 + xi_6;
-            const double u_0 = vel0Term + xi_1*-1.0 + xi_10*-1.0 + xi_11*-1.0 + xi_17*-1.0 + xi_18*-1.0 + xi_22*-1.0 + xi_23*-1.0 + xi_27*-1.0 + xi_3*-1.0;
-            const double u_1 = vel1Term + xi_12 + xi_14 + xi_15*-1.0 + xi_16*-1.0 + xi_18*-1.0 + xi_22*-1.0 + xi_3*-1.0 + xi_4 + xi_5*-1.0 + xi_6*-1.0 + xi_7*-1.0 + xi_8*-1.0;
-            const double u_2 = vel2Term + xi_10*-1.0 + xi_12 + xi_15*-1.0 + xi_17 + xi_2*-1.0 + xi_21*-1.0 + xi_23*-1.0 + xi_24 + xi_25*-1.0 + xi_3*-1.0 + xi_4*-1.0 + xi_6*-1.0 + xi_8 + xi_9;
-            const double u0Mu1 = u_0 + u_1*-1.0;
+            const double xi_1 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3];
+            const double xi_2 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3];
+            const double xi_3 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2];
+            const double xi_4 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3];
+            const double xi_5 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3];
+            const double xi_6 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3];
+            const double xi_7 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3];
+            const double xi_8 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3];
+            const double xi_9 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3];
+            const double xi_10 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3];
+            const double xi_11 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3];
+            const double xi_12 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3];
+            const double xi_13 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3];
+            const double xi_14 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3];
+            const double xi_15 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3];
+            const double xi_16 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3];
+            const double xi_17 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3];
+            const double xi_18 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3];
+            const double xi_19 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3];
+            const double xi_20 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3];
+            const double xi_21 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3];
+            const double xi_22 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3];
+            const double xi_23 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3];
+            const double xi_24 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3];
+            const double xi_25 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3];
+            const double xi_26 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3];
+            const double xi_27 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3];
+            const double vel0Term = xi_1 + xi_10 + xi_13 + xi_14 + xi_16 + xi_17 + xi_19 + xi_24 + xi_25;
+            const double vel1Term = xi_12 + xi_22 + xi_23 + xi_27 + xi_4 + xi_9;
+            const double vel2Term = xi_15 + xi_5 + xi_6 + xi_7;
+            const double delta_rho = vel0Term + vel1Term + vel2Term + xi_11 + xi_18 + xi_2 + xi_20 + xi_21 + xi_26 + xi_3 + xi_8;
+            const double u_0 = vel0Term - xi_11 - xi_12 - xi_15 - xi_20 - xi_23 - xi_26 - xi_27 - xi_5 - xi_8;
+            const double u_1 = vel1Term + xi_1 + xi_10 - xi_15 - xi_16 + xi_19 - xi_2 - xi_20 - xi_21 - xi_24 - xi_25 - xi_7 - xi_8;
+            const double u_2 = vel2Term - xi_1 + xi_10 - xi_11 + xi_12 - xi_13 - xi_16 + xi_17 - xi_18 - xi_2 - xi_22 - xi_23 + xi_24 - xi_8 + xi_9;
+            const double u0Mu1 = u_0 - u_1;
             const double u0Pu1 = u_0 + u_1;
             const double u1Pu2 = u_1 + u_2;
-            const double u1Mu2 = u_1 + u_2*-1.0;
-            const double u0Mu2 = u_0 + u_2*-1.0;
+            const double u1Mu2 = u_1 - u_2;
+            const double u0Mu2 = u_0 - u_2;
             const double u0Pu2 = u_0 + u_2;
             const double f_eq_common = delta_rho - 1.5*(u_0*u_0) - 1.5*(u_1*u_1) - 1.5*(u_2*u_2);
-            _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.29629629629629628 + xi_20*-1.0) + xi_20;
-            _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_1*0.22222222222222221 + xi_13*-1.0 + 0.33333333333333331*(u_1*u_1)) + xi_13;
-            _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_1*-0.22222222222222221 + xi_16*-1.0 + 0.33333333333333331*(u_1*u_1)) + xi_16;
-            _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_0*-0.22222222222222221 + xi_27*-1.0 + 0.33333333333333331*(u_0*u_0)) + xi_27;
-            _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_0*0.22222222222222221 + xi_26*-1.0 + 0.33333333333333331*(u_0*u_0)) + xi_26;
-            _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_2*0.22222222222222221 + xi_19*-1.0 + 0.33333333333333331*(u_2*u_2)) + xi_19;
-            _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_2*-0.22222222222222221 + xi_2*-1.0 + 0.33333333333333331*(u_2*u_2)) + xi_2;
-            _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*-0.055555555555555552 + xi_11*-1.0 + 0.083333333333333329*(u0Mu1*u0Mu1)) + xi_11;
-            _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*0.055555555555555552 + xi_14*-1.0 + 0.083333333333333329*(u0Pu1*u0Pu1)) + xi_14;
-            _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*-0.055555555555555552 + xi_18*-1.0 + 0.083333333333333329*(u0Pu1*u0Pu1)) + xi_18;
-            _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*0.055555555555555552 + xi_5*-1.0 + 0.083333333333333329*(u0Mu1*u0Mu1)) + xi_5;
-            _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*0.055555555555555552 + xi_9*-1.0 + 0.083333333333333329*(u1Pu2*u1Pu2)) + xi_9;
-            _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*-0.055555555555555552 + xi_7*-1.0 + 0.083333333333333329*(u1Mu2*u1Mu2)) + xi_7;
-            _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*-0.055555555555555552 + xi_1*-1.0 + 0.083333333333333329*(u0Mu2*u0Mu2)) + xi_1;
-            _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*0.055555555555555552 + xi_24*-1.0 + 0.083333333333333329*(u0Pu2*u0Pu2)) + xi_24;
-            _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*0.055555555555555552 + xi_21*-1.0 + 0.083333333333333329*(u1Mu2*u1Mu2)) + xi_21;
-            _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*-0.055555555555555552 + xi_6*-1.0 + 0.083333333333333329*(u1Pu2*u1Pu2)) + xi_6;
-            _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*-0.055555555555555552 + xi_23*-1.0 + 0.083333333333333329*(u0Pu2*u0Pu2)) + xi_23;
-            _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*0.055555555555555552 + xi_25*-1.0 + 0.083333333333333329*(u0Mu2*u0Mu2)) + xi_25;
-            _data_pdfs_20_319_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*0.013888888888888888 + xi_12*-1.0 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_12;
-            _data_pdfs_20_320_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*0.013888888888888888 + xi_17*-1.0 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_17;
-            _data_pdfs_20_321_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*0.013888888888888888 + xi_8*-1.0 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_8;
-            _data_pdfs_20_322_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*0.013888888888888888 + xi_22*-1.0 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_22;
-            _data_pdfs_20_323_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*-0.013888888888888888 + xi_4*-1.0 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_4;
-            _data_pdfs_20_324_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*-0.013888888888888888 + xi_10*-1.0 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_10;
-            _data_pdfs_20_325_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*-0.013888888888888888 + xi_15*-1.0 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_15;
-            _data_pdfs_20_326_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*-0.013888888888888888 + xi_3*-1.0 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_3;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2] = omega*(f_eq_common*0.29629629629629628 - xi_3) + xi_3;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3] = omega*(f_eq_common*0.07407407407407407 + u_1*0.22222222222222221 - xi_4 + 0.33333333333333331*(u_1*u_1)) + xi_4;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] = omega*(f_eq_common*0.07407407407407407 + u_1*-0.22222222222222221 - xi_21 + 0.33333333333333331*(u_1*u_1)) + xi_21;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] = omega*(f_eq_common*0.07407407407407407 + u_0*-0.22222222222222221 - xi_26 + 0.33333333333333331*(u_0*u_0)) + xi_26;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3] = omega*(f_eq_common*0.07407407407407407 + u_0*0.22222222222222221 - xi_14 + 0.33333333333333331*(u_0*u_0)) + xi_14;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3] = omega*(f_eq_common*0.07407407407407407 + u_2*0.22222222222222221 - xi_6 + 0.33333333333333331*(u_2*u_2)) + xi_6;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3] = omega*(f_eq_common*0.07407407407407407 + u_2*-0.22222222222222221 - xi_18 + 0.33333333333333331*(u_2*u_2)) + xi_18;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*-0.055555555555555552 - xi_27 + 0.083333333333333329*(u0Mu1*u0Mu1)) + xi_27;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*0.055555555555555552 - xi_19 + 0.083333333333333329*(u0Pu1*u0Pu1)) + xi_19;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*-0.055555555555555552 - xi_20 + 0.083333333333333329*(u0Pu1*u0Pu1)) + xi_20;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*0.055555555555555552 - xi_25 + 0.083333333333333329*(u0Mu1*u0Mu1)) + xi_25;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*0.055555555555555552 - xi_9 + 0.083333333333333329*(u1Pu2*u1Pu2)) + xi_9;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*-0.055555555555555552 - xi_7 + 0.083333333333333329*(u1Mu2*u1Mu2)) + xi_7;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*-0.055555555555555552 - xi_5 + 0.083333333333333329*(u0Mu2*u0Mu2)) + xi_5;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*0.055555555555555552 - xi_17 + 0.083333333333333329*(u0Pu2*u0Pu2)) + xi_17;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*0.055555555555555552 - xi_22 + 0.083333333333333329*(u1Mu2*u1Mu2)) + xi_22;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*-0.055555555555555552 - xi_2 + 0.083333333333333329*(u1Pu2*u1Pu2)) + xi_2;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*-0.055555555555555552 - xi_11 + 0.083333333333333329*(u0Pu2*u0Pu2)) + xi_11;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*0.055555555555555552 - xi_13 + 0.083333333333333329*(u0Mu2*u0Mu2)) + xi_13;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*0.013888888888888888 - xi_10 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_10;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*0.013888888888888888 - xi_12 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_12;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*0.013888888888888888 - xi_24 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_24;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*0.013888888888888888 - xi_15 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_15;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*-0.013888888888888888 - xi_1 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_1;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*-0.013888888888888888 - xi_23 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_23;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*-0.013888888888888888 - xi_16 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_16;
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*-0.013888888888888888 - xi_8 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_8;
          }
       }
    }
@@ -344,172 +182,64 @@ static FUNC_PREFIX void d3q27srt_kernel_stream(double * RESTRICT const _data_pdf
 {
    for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
-      double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_319 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_320 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_321 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_322 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_323 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_324 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_325 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_326 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2;
-      double * RESTRICT  _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_319 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 19*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_320 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 20*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_321 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 21*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_322 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 22*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_323 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 23*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_324 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 24*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_325 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 25*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_326 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 26*_stride_pdfs_tmp_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
-         double * RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
-         double * RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
-         double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
-         double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
-         double * RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35;
-         double * RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36;
-         double * RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
-         double * RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
-         double * RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
-         double * RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
-         double * RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
-         double * RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
-         double * RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313;
-         double * RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314;
-         double * RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
-         double * RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
-         double * RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317;
-         double * RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318;
-         double * RESTRICT _data_pdfs_2m1_319_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_319;
-         double * RESTRICT _data_pdfs_2m1_320_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_320;
-         double * RESTRICT _data_pdfs_2m1_321_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_321;
-         double * RESTRICT _data_pdfs_2m1_322_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_322;
-         double * RESTRICT _data_pdfs_21_323_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_323;
-         double * RESTRICT _data_pdfs_21_324_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_324;
-         double * RESTRICT _data_pdfs_21_325_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_325;
-         double * RESTRICT _data_pdfs_21_326_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_326;
-         double * RESTRICT  _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30;
-         double * RESTRICT  _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31;
-         double * RESTRICT  _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32;
-         double * RESTRICT  _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33;
-         double * RESTRICT  _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34;
-         double * RESTRICT  _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35;
-         double * RESTRICT  _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36;
-         double * RESTRICT  _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37;
-         double * RESTRICT  _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38;
-         double * RESTRICT  _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39;
-         double * RESTRICT  _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310;
-         double * RESTRICT  _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311;
-         double * RESTRICT  _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312;
-         double * RESTRICT  _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313;
-         double * RESTRICT  _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314;
-         double * RESTRICT  _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315;
-         double * RESTRICT  _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316;
-         double * RESTRICT  _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317;
-         double * RESTRICT  _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318;
-         double * RESTRICT  _data_pdfs_tmp_20_319_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_319;
-         double * RESTRICT  _data_pdfs_tmp_20_320_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_320;
-         double * RESTRICT  _data_pdfs_tmp_20_321_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_321;
-         double * RESTRICT  _data_pdfs_tmp_20_322_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_322;
-         double * RESTRICT  _data_pdfs_tmp_20_323_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_323;
-         double * RESTRICT  _data_pdfs_tmp_20_324_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_324;
-         double * RESTRICT  _data_pdfs_tmp_20_325_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_325;
-         double * RESTRICT  _data_pdfs_tmp_20_326_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_326;
          for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1)
          {
-            const double streamed_0 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
-            const double streamed_1 = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0];
-            const double streamed_2 = _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0];
-            const double streamed_3 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_4 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_5 = _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
-            const double streamed_6 = _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
-            const double streamed_7 = _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_8 = _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_9 = _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_10 = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_11 = _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
-            const double streamed_12 = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0];
-            const double streamed_13 = _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_14 = _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_15 = _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0];
-            const double streamed_16 = _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0];
-            const double streamed_17 = _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_18 = _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_19 = _data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_20 = _data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_21 = _data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_22 = _data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_23 = _data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_24 = _data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_25 = _data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_26 = _data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = streamed_0;
-            _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = streamed_1;
-            _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = streamed_2;
-            _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = streamed_3;
-            _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = streamed_4;
-            _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = streamed_5;
-            _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = streamed_6;
-            _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = streamed_7;
-            _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = streamed_8;
-            _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = streamed_9;
-            _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = streamed_10;
-            _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = streamed_11;
-            _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = streamed_12;
-            _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = streamed_13;
-            _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = streamed_14;
-            _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = streamed_15;
-            _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = streamed_16;
-            _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = streamed_17;
-            _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = streamed_18;
-            _data_pdfs_tmp_20_319_10[_stride_pdfs_tmp_0*ctr_0] = streamed_19;
-            _data_pdfs_tmp_20_320_10[_stride_pdfs_tmp_0*ctr_0] = streamed_20;
-            _data_pdfs_tmp_20_321_10[_stride_pdfs_tmp_0*ctr_0] = streamed_21;
-            _data_pdfs_tmp_20_322_10[_stride_pdfs_tmp_0*ctr_0] = streamed_22;
-            _data_pdfs_tmp_20_323_10[_stride_pdfs_tmp_0*ctr_0] = streamed_23;
-            _data_pdfs_tmp_20_324_10[_stride_pdfs_tmp_0*ctr_0] = streamed_24;
-            _data_pdfs_tmp_20_325_10[_stride_pdfs_tmp_0*ctr_0] = streamed_25;
-            _data_pdfs_tmp_20_326_10[_stride_pdfs_tmp_0*ctr_0] = streamed_26;
+            const double streamed_0 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2];
+            const double streamed_1 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3];
+            const double streamed_2 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3];
+            const double streamed_3 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3];
+            const double streamed_4 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3];
+            const double streamed_5 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3];
+            const double streamed_6 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3];
+            const double streamed_7 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3];
+            const double streamed_8 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3];
+            const double streamed_9 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3];
+            const double streamed_10 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3];
+            const double streamed_11 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3];
+            const double streamed_12 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3];
+            const double streamed_13 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3];
+            const double streamed_14 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3];
+            const double streamed_15 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3];
+            const double streamed_16 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3];
+            const double streamed_17 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3];
+            const double streamed_18 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3];
+            const double streamed_19 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3];
+            const double streamed_20 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3];
+            const double streamed_21 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3];
+            const double streamed_22 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3];
+            const double streamed_23 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3];
+            const double streamed_24 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3];
+            const double streamed_25 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3];
+            const double streamed_26 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2] = streamed_0;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3] = streamed_1;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3] = streamed_2;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3] = streamed_3;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3] = streamed_4;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3] = streamed_5;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3] = streamed_6;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3] = streamed_7;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3] = streamed_8;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3] = streamed_9;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3] = streamed_10;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3] = streamed_11;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3] = streamed_12;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3] = streamed_13;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3] = streamed_14;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3] = streamed_15;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3] = streamed_16;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3] = streamed_17;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3] = streamed_18;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 19*_stride_pdfs_tmp_3] = streamed_19;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 20*_stride_pdfs_tmp_3] = streamed_20;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 21*_stride_pdfs_tmp_3] = streamed_21;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 22*_stride_pdfs_tmp_3] = streamed_22;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 23*_stride_pdfs_tmp_3] = streamed_23;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 24*_stride_pdfs_tmp_3] = streamed_24;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 25*_stride_pdfs_tmp_3] = streamed_25;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 26*_stride_pdfs_tmp_3] = streamed_26;
          }
       }
    }
@@ -522,172 +252,64 @@ static FUNC_PREFIX void d3q27srt_kernel_streamOnlyNoAdvancement(double * RESTRIC
 {
    for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
-      double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_319 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_320 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_321 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_2m1_322 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_323 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_324 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_325 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_21_326 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2;
-      double * RESTRICT  _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_319 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 19*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_320 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 20*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_321 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 21*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_322 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 22*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_323 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 23*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_324 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 24*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_325 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 25*_stride_pdfs_tmp_3;
-      double * RESTRICT  _data_pdfs_tmp_20_326 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 26*_stride_pdfs_tmp_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
-         double * RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
-         double * RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
-         double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
-         double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
-         double * RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35;
-         double * RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36;
-         double * RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
-         double * RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
-         double * RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
-         double * RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
-         double * RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
-         double * RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
-         double * RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313;
-         double * RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314;
-         double * RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
-         double * RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
-         double * RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317;
-         double * RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318;
-         double * RESTRICT _data_pdfs_2m1_319_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_319;
-         double * RESTRICT _data_pdfs_2m1_320_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_320;
-         double * RESTRICT _data_pdfs_2m1_321_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_321;
-         double * RESTRICT _data_pdfs_2m1_322_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_322;
-         double * RESTRICT _data_pdfs_21_323_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_323;
-         double * RESTRICT _data_pdfs_21_324_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_324;
-         double * RESTRICT _data_pdfs_21_325_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_325;
-         double * RESTRICT _data_pdfs_21_326_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_326;
-         double * RESTRICT  _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30;
-         double * RESTRICT  _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31;
-         double * RESTRICT  _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32;
-         double * RESTRICT  _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33;
-         double * RESTRICT  _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34;
-         double * RESTRICT  _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35;
-         double * RESTRICT  _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36;
-         double * RESTRICT  _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37;
-         double * RESTRICT  _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38;
-         double * RESTRICT  _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39;
-         double * RESTRICT  _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310;
-         double * RESTRICT  _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311;
-         double * RESTRICT  _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312;
-         double * RESTRICT  _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313;
-         double * RESTRICT  _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314;
-         double * RESTRICT  _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315;
-         double * RESTRICT  _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316;
-         double * RESTRICT  _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317;
-         double * RESTRICT  _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318;
-         double * RESTRICT  _data_pdfs_tmp_20_319_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_319;
-         double * RESTRICT  _data_pdfs_tmp_20_320_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_320;
-         double * RESTRICT  _data_pdfs_tmp_20_321_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_321;
-         double * RESTRICT  _data_pdfs_tmp_20_322_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_322;
-         double * RESTRICT  _data_pdfs_tmp_20_323_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_323;
-         double * RESTRICT  _data_pdfs_tmp_20_324_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_324;
-         double * RESTRICT  _data_pdfs_tmp_20_325_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_325;
-         double * RESTRICT  _data_pdfs_tmp_20_326_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_326;
          for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1)
          {
-            const double streamed_0 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
-            const double streamed_1 = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0];
-            const double streamed_2 = _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0];
-            const double streamed_3 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_4 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_5 = _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
-            const double streamed_6 = _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
-            const double streamed_7 = _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_8 = _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_9 = _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_10 = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_11 = _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
-            const double streamed_12 = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0];
-            const double streamed_13 = _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_14 = _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_15 = _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0];
-            const double streamed_16 = _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0];
-            const double streamed_17 = _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_18 = _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_19 = _data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_20 = _data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_21 = _data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_22 = _data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_23 = _data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_24 = _data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double streamed_25 = _data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double streamed_26 = _data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = streamed_0;
-            _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = streamed_1;
-            _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = streamed_2;
-            _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = streamed_3;
-            _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = streamed_4;
-            _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = streamed_5;
-            _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = streamed_6;
-            _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = streamed_7;
-            _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = streamed_8;
-            _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = streamed_9;
-            _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = streamed_10;
-            _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = streamed_11;
-            _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = streamed_12;
-            _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = streamed_13;
-            _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = streamed_14;
-            _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = streamed_15;
-            _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = streamed_16;
-            _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = streamed_17;
-            _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = streamed_18;
-            _data_pdfs_tmp_20_319_10[_stride_pdfs_tmp_0*ctr_0] = streamed_19;
-            _data_pdfs_tmp_20_320_10[_stride_pdfs_tmp_0*ctr_0] = streamed_20;
-            _data_pdfs_tmp_20_321_10[_stride_pdfs_tmp_0*ctr_0] = streamed_21;
-            _data_pdfs_tmp_20_322_10[_stride_pdfs_tmp_0*ctr_0] = streamed_22;
-            _data_pdfs_tmp_20_323_10[_stride_pdfs_tmp_0*ctr_0] = streamed_23;
-            _data_pdfs_tmp_20_324_10[_stride_pdfs_tmp_0*ctr_0] = streamed_24;
-            _data_pdfs_tmp_20_325_10[_stride_pdfs_tmp_0*ctr_0] = streamed_25;
-            _data_pdfs_tmp_20_326_10[_stride_pdfs_tmp_0*ctr_0] = streamed_26;
+            const double streamed_0 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2];
+            const double streamed_1 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3];
+            const double streamed_2 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3];
+            const double streamed_3 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3];
+            const double streamed_4 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3];
+            const double streamed_5 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3];
+            const double streamed_6 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3];
+            const double streamed_7 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3];
+            const double streamed_8 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3];
+            const double streamed_9 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3];
+            const double streamed_10 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3];
+            const double streamed_11 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3];
+            const double streamed_12 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3];
+            const double streamed_13 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3];
+            const double streamed_14 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3];
+            const double streamed_15 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3];
+            const double streamed_16 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3];
+            const double streamed_17 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3];
+            const double streamed_18 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3];
+            const double streamed_19 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3];
+            const double streamed_20 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3];
+            const double streamed_21 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3];
+            const double streamed_22 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3];
+            const double streamed_23 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3];
+            const double streamed_24 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3];
+            const double streamed_25 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3];
+            const double streamed_26 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3];
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2] = streamed_0;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3] = streamed_1;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3] = streamed_2;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3] = streamed_3;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3] = streamed_4;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3] = streamed_5;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3] = streamed_6;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3] = streamed_7;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3] = streamed_8;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3] = streamed_9;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3] = streamed_10;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3] = streamed_11;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3] = streamed_12;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3] = streamed_13;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3] = streamed_14;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3] = streamed_15;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3] = streamed_16;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3] = streamed_17;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3] = streamed_18;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 19*_stride_pdfs_tmp_3] = streamed_19;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 20*_stride_pdfs_tmp_3] = streamed_20;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 21*_stride_pdfs_tmp_3] = streamed_21;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 22*_stride_pdfs_tmp_3] = streamed_22;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 23*_stride_pdfs_tmp_3] = streamed_23;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 24*_stride_pdfs_tmp_3] = streamed_24;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 25*_stride_pdfs_tmp_3] = streamed_25;
+            _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 26*_stride_pdfs_tmp_3] = streamed_26;
          }
       }
    }
@@ -700,104 +322,42 @@ static FUNC_PREFIX void d3q27srt_kernel_initialise(double * RESTRICT const _data
 {
    for (int64_t ctr_2 = 0; ctr_2 < _size_density_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_density_20_30 = _data_density + _stride_density_2*ctr_2;
-      double * RESTRICT _data_velocity_20_30 = _data_velocity + _stride_velocity_2*ctr_2;
-      double * RESTRICT _data_velocity_20_31 = _data_velocity + _stride_velocity_2*ctr_2 + _stride_velocity_3;
-      double * RESTRICT _data_velocity_20_32 = _data_velocity + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3;
-      double * RESTRICT  _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
-      double * RESTRICT  _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_319 = _data_pdfs + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_320 = _data_pdfs + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_321 = _data_pdfs + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_322 = _data_pdfs + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_323 = _data_pdfs + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_324 = _data_pdfs + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_325 = _data_pdfs + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3;
-      double * RESTRICT  _data_pdfs_20_326 = _data_pdfs + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_density_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_density_20_30_10 = _stride_density_1*ctr_1 + _data_density_20_30;
-         double * RESTRICT _data_velocity_20_30_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_30;
-         double * RESTRICT _data_velocity_20_31_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_31;
-         double * RESTRICT _data_velocity_20_32_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_32;
-         double * RESTRICT  _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
-         double * RESTRICT  _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31;
-         double * RESTRICT  _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32;
-         double * RESTRICT  _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
-         double * RESTRICT  _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
-         double * RESTRICT  _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35;
-         double * RESTRICT  _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36;
-         double * RESTRICT  _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37;
-         double * RESTRICT  _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38;
-         double * RESTRICT  _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39;
-         double * RESTRICT  _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310;
-         double * RESTRICT  _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311;
-         double * RESTRICT  _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312;
-         double * RESTRICT  _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313;
-         double * RESTRICT  _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314;
-         double * RESTRICT  _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315;
-         double * RESTRICT  _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316;
-         double * RESTRICT  _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317;
-         double * RESTRICT  _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318;
-         double * RESTRICT  _data_pdfs_20_319_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_319;
-         double * RESTRICT  _data_pdfs_20_320_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_320;
-         double * RESTRICT  _data_pdfs_20_321_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_321;
-         double * RESTRICT  _data_pdfs_20_322_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_322;
-         double * RESTRICT  _data_pdfs_20_323_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_323;
-         double * RESTRICT  _data_pdfs_20_324_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_324;
-         double * RESTRICT  _data_pdfs_20_325_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_325;
-         double * RESTRICT  _data_pdfs_20_326_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_326;
          for (int64_t ctr_0 = 0; ctr_0 < _size_density_0; ctr_0 += 1)
          {
-            const double rho = _data_density_20_30_10[_stride_density_0*ctr_0];
+            const double rho = _data_density[_stride_density_0*ctr_0 + _stride_density_1*ctr_1 + _stride_density_2*ctr_2];
             const double delta_rho = rho - 1.0;
-            const double u_0 = _data_velocity_20_30_10[_stride_velocity_0*ctr_0];
-            const double u_1 = _data_velocity_20_31_10[_stride_velocity_0*ctr_0];
-            const double u_2 = _data_velocity_20_32_10[_stride_velocity_0*ctr_0];
-            _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] = delta_rho*0.29629629629629628 - 0.44444444444444442*(u_0*u_0) - 0.44444444444444442*(u_1*u_1) - 0.44444444444444442*(u_2*u_2);
-            _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] = delta_rho*0.07407407407407407 + u_1*0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_1*u_1);
-            _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] = delta_rho*0.07407407407407407 + u_1*-0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_1*u_1);
-            _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] = delta_rho*0.07407407407407407 + u_0*-0.22222222222222221 - 0.1111111111111111*(u_1*u_1) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_0*u_0);
-            _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] = delta_rho*0.07407407407407407 + u_0*0.22222222222222221 - 0.1111111111111111*(u_1*u_1) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_0*u_0);
-            _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0] = delta_rho*0.07407407407407407 + u_2*0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_1*u_1) + 0.22222222222222221*(u_2*u_2);
-            _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] = delta_rho*0.07407407407407407 + u_2*-0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_1*u_1) + 0.22222222222222221*(u_2*u_2);
-            _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_1*-0.16666666666666666 + u_0*-0.055555555555555552 + u_1*0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1);
-            _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_1*0.16666666666666666 + u_0*0.055555555555555552 + u_1*0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1);
-            _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_1*0.16666666666666666 + u_0*-0.055555555555555552 + u_1*-0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1);
-            _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_1*-0.16666666666666666 + u_0*0.055555555555555552 + u_1*-0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1);
-            _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_1*u_2*0.16666666666666666 + u_1*0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2);
-            _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_1*u_2*-0.16666666666666666 + u_1*-0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2);
-            _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_2*-0.16666666666666666 + u_0*-0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2);
-            _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_2*0.16666666666666666 + u_0*0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2);
-            _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_1*u_2*-0.16666666666666666 + u_1*0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2);
-            _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_1*u_2*0.16666666666666666 + u_1*-0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2);
-            _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_2*0.16666666666666666 + u_0*-0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2);
-            _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_2*-0.16666666666666666 + u_0*0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2);
-            _data_pdfs_20_319_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
-            _data_pdfs_20_320_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
-            _data_pdfs_20_321_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*-0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
-            _data_pdfs_20_322_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*-0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
-            _data_pdfs_20_323_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
-            _data_pdfs_20_324_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
-            _data_pdfs_20_325_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*-0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
-            _data_pdfs_20_326_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*-0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
+            const double u_0 = _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2];
+            const double u_1 = _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2 + _stride_velocity_3];
+            const double u_2 = _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3];
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2] = delta_rho*0.29629629629629628 - 0.44444444444444442*(u_0*u_0) - 0.44444444444444442*(u_1*u_1) - 0.44444444444444442*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3] = delta_rho*0.07407407407407407 + u_1*0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_1*u_1);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] = delta_rho*0.07407407407407407 + u_1*-0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_1*u_1);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] = delta_rho*0.07407407407407407 + u_0*-0.22222222222222221 - 0.1111111111111111*(u_1*u_1) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_0*u_0);
+            _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3] = delta_rho*0.07407407407407407 + u_0*0.22222222222222221 - 0.1111111111111111*(u_1*u_1) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_0*u_0);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3] = delta_rho*0.07407407407407407 + u_2*0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_1*u_1) + 0.22222222222222221*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3] = delta_rho*0.07407407407407407 + u_2*-0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_1*u_1) + 0.22222222222222221*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_0*u_1*-0.16666666666666666 + u_0*-0.055555555555555552 + u_1*0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1);
+            _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_0*u_1*0.16666666666666666 + u_0*0.055555555555555552 + u_1*0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_0*u_1*0.16666666666666666 + u_0*-0.055555555555555552 + u_1*-0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1);
+            _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_0*u_1*-0.16666666666666666 + u_0*0.055555555555555552 + u_1*-0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_1*u_2*0.16666666666666666 + u_1*0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_1*u_2*-0.16666666666666666 + u_1*-0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_0*u_2*-0.16666666666666666 + u_0*-0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_0*u_2*0.16666666666666666 + u_0*0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_1*u_2*-0.16666666666666666 + u_1*0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_1*u_2*0.16666666666666666 + u_1*-0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_0*u_2*0.16666666666666666 + u_0*-0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_0*u_2*-0.16666666666666666 + u_0*0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*-0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*-0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*-0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
+            _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*-0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
          }
       }
    }
@@ -810,87 +370,25 @@ static FUNC_PREFIX void d3q27srt_kernel_getter(double * RESTRICT  _data_density,
 {
    for (int64_t ctr_2 = 0; ctr_2 < _size_density_2; ctr_2 += 1)
    {
-      double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_319 = _data_pdfs + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_321 = _data_pdfs + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_323 = _data_pdfs + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_325 = _data_pdfs + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_320 = _data_pdfs + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_322 = _data_pdfs + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_324 = _data_pdfs + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_326 = _data_pdfs + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
-      double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
-      double * RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
-      double * RESTRICT  _data_density_20_30 = _data_density + _stride_density_2*ctr_2;
-      double * RESTRICT  _data_velocity_20_30 = _data_velocity + _stride_velocity_2*ctr_2;
-      double * RESTRICT  _data_velocity_20_31 = _data_velocity + _stride_velocity_2*ctr_2 + _stride_velocity_3;
-      double * RESTRICT  _data_velocity_20_32 = _data_velocity + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3;
       for (int64_t ctr_1 = 0; ctr_1 < _size_density_1; ctr_1 += 1)
       {
-         double * RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310;
-         double * RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314;
-         double * RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318;
-         double * RESTRICT _data_pdfs_20_319_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_319;
-         double * RESTRICT _data_pdfs_20_321_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_321;
-         double * RESTRICT _data_pdfs_20_323_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_323;
-         double * RESTRICT _data_pdfs_20_325_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_325;
-         double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
-         double * RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38;
-         double * RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313;
-         double * RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317;
-         double * RESTRICT _data_pdfs_20_320_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_320;
-         double * RESTRICT _data_pdfs_20_322_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_322;
-         double * RESTRICT _data_pdfs_20_324_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_324;
-         double * RESTRICT _data_pdfs_20_326_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_326;
-         double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
-         double * RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37;
-         double * RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39;
-         double * RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31;
-         double * RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311;
-         double * RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315;
-         double * RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312;
-         double * RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316;
-         double * RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32;
-         double * RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35;
-         double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
-         double * RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36;
-         double * RESTRICT  _data_density_20_30_10 = _stride_density_1*ctr_1 + _data_density_20_30;
-         double * RESTRICT  _data_velocity_20_30_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_30;
-         double * RESTRICT  _data_velocity_20_31_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_31;
-         double * RESTRICT  _data_velocity_20_32_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_32;
          for (int64_t ctr_0 = 0; ctr_0 < _size_density_0; ctr_0 += 1)
          {
-            const double vel0Term = _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_319_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_321_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_323_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_325_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0];
-            const double momdensity_0 = vel0Term - 1.0*_data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_320_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_322_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_324_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_326_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_39_10[_stride_pdfs_0*ctr_0];
-            const double vel1Term = _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_320_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_324_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0];
-            const double momdensity_1 = vel1Term - 1.0*_data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_321_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_322_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_325_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_326_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_319_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_323_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0];
-            const double vel2Term = _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_322_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0];
-            const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_326_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0];
-            const double momdensity_2 = vel2Term - 1.0*_data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_323_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_324_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_325_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_326_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_319_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_320_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_321_10[_stride_pdfs_0*ctr_0];
+            const double vel0Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3];
+            const double momdensity_0 = vel0Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3];
+            const double vel1Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3];
+            const double momdensity_1 = vel1Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3];
+            const double vel2Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3];
+            const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2];
+            const double momdensity_2 = vel2Term + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3];
             const double rho = delta_rho + 1.0;
             const double u_0 = momdensity_0;
             const double u_1 = momdensity_1;
             const double u_2 = momdensity_2;
-            _data_density_20_30_10[_stride_density_0*ctr_0] = rho;
-            _data_velocity_20_30_10[_stride_velocity_0*ctr_0] = u_0;
-            _data_velocity_20_31_10[_stride_velocity_0*ctr_0] = u_1;
-            _data_velocity_20_32_10[_stride_velocity_0*ctr_0] = u_2;
+            _data_density[_stride_density_0*ctr_0 + _stride_density_1*ctr_1 + _stride_density_2*ctr_2] = rho;
+            _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2] = u_0;
+            _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2 + _stride_velocity_3] = u_1;
+            _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3] = u_2;
          }
       }
    }
diff --git a/src/lbm_generated/sweep_collection/D3Q27SRT.h b/src/lbm_generated/sweep_collection/D3Q27SRT.h
index eb45b71660fbf902d16cd064e2f09dadf24548d7..72ba7d41c0a4581c36d144ce5ac6d1d70c2004b5 100644
--- a/src/lbm_generated/sweep_collection/D3Q27SRT.h
+++ b/src/lbm_generated/sweep_collection/D3Q27SRT.h
@@ -59,20 +59,20 @@ namespace lbm {
 
 class D3Q27SRT
 {
-public:
-  enum Type { ALL = 0, INNER = 1, OUTER = 2 };
+ public:
+   enum Type { ALL = 0, INNER = 1, OUTER = 2 };
 
    D3Q27SRT(const shared_ptr< StructuredBlockStorage > & blocks, BlockDataID pdfsID_, BlockDataID densityID_, BlockDataID velocityID_, double omega, const Cell & outerWidth=Cell(1, 1, 1))
-     : blocks_(blocks), pdfsID(pdfsID_), densityID(densityID_), velocityID(velocityID_), omega_(omega), outerWidth_(outerWidth)
+      : blocks_(blocks), pdfsID(pdfsID_), densityID(densityID_), velocityID(velocityID_), omega_(omega), outerWidth_(outerWidth)
    {
       
 
+      validInnerOuterSplit_= true;
+
       for (auto& iBlock : *blocks)
       {
-         if (int_c(blocks->getNumberOfXCells(iBlock)) <= outerWidth_[0] * 2 ||
-             int_c(blocks->getNumberOfYCells(iBlock)) <= outerWidth_[1] * 2 ||
-             int_c(blocks->getNumberOfZCells(iBlock)) <= outerWidth_[2] * 2)
-          WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller or increase cellsPerBlock")
+         if (int_c(blocks->getNumberOfXCells(iBlock)) <= outerWidth_[0] * 2 || int_c(blocks->getNumberOfYCells(iBlock)) <= outerWidth_[1] * 2 || int_c(blocks->getNumberOfZCells(iBlock)) <= outerWidth_[2] * 2)
+            validInnerOuterSplit_ = false;
       }
    };
 
@@ -117,27 +117,33 @@ public:
 
    std::function<void (IBlock *)> streamCollide(Type type)
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [this](IBlock* block) { streamCollideInner(block); };
-         case Type::OUTER:
-            return [this](IBlock* block) { streamCollideOuter(block); };
-         default:
-            return [this](IBlock* block) { streamCollide(block); };
+      case Type::INNER:
+         return [this](IBlock* block) { streamCollideInner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { streamCollideOuter(block); };
+      default:
+         return [this](IBlock* block) { streamCollide(block); };
       }
    }
 
    std::function<void (IBlock *)> streamCollide(Type type, const cell_idx_t ghost_layers)
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [this](IBlock* block) { streamCollideInner(block); };
-         case Type::OUTER:
-            return [this](IBlock* block) { streamCollideOuter(block); };
-         default:
-            return [this, ghost_layers](IBlock* block) { streamCollide(block, ghost_layers); };
+      case Type::INNER:
+         return [this](IBlock* block) { streamCollideInner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { streamCollideOuter(block); };
+      default:
+         return [this, ghost_layers](IBlock* block) { streamCollide(block, ghost_layers); };
       }
    }
 
@@ -298,14 +304,14 @@ public:
          layers_.push_back(ci);
       }
 
-    
+      
       for( auto & ci: layers_ )
       {
          streamCollideCellInterval(pdfs, pdfs_tmp, omega, ci);
       }
-    
+      
 
-    pdfs->swapDataPointers(pdfs_tmp);
+      pdfs->swapDataPointers(pdfs_tmp);
 
    }
    
@@ -317,27 +323,33 @@ public:
 
    std::function<void (IBlock *)> collide(Type type)
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [this](IBlock* block) { collideInner(block); };
-         case Type::OUTER:
-            return [this](IBlock* block) { collideOuter(block); };
-         default:
-            return [this](IBlock* block) { collide(block); };
+      case Type::INNER:
+         return [this](IBlock* block) { collideInner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { collideOuter(block); };
+      default:
+         return [this](IBlock* block) { collide(block); };
       }
    }
 
    std::function<void (IBlock *)> collide(Type type, const cell_idx_t ghost_layers)
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [this](IBlock* block) { collideInner(block); };
-         case Type::OUTER:
-            return [this](IBlock* block) { collideOuter(block); };
-         default:
-            return [this, ghost_layers](IBlock* block) { collide(block, ghost_layers); };
+      case Type::INNER:
+         return [this](IBlock* block) { collideInner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { collideOuter(block); };
+      default:
+         return [this, ghost_layers](IBlock* block) { collide(block, ghost_layers); };
       }
    }
 
@@ -425,14 +437,14 @@ public:
          layers_.push_back(ci);
       }
 
-    
+      
       for( auto & ci: layers_ )
       {
          collideCellInterval(pdfs, omega, ci);
       }
-    
+      
 
-    
+      
    }
    
 
@@ -443,27 +455,33 @@ public:
 
    std::function<void (IBlock *)> stream(Type type)
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [this](IBlock* block) { streamInner(block); };
-         case Type::OUTER:
-            return [this](IBlock* block) { streamOuter(block); };
-         default:
-            return [this](IBlock* block) { stream(block); };
+      case Type::INNER:
+         return [this](IBlock* block) { streamInner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { streamOuter(block); };
+      default:
+         return [this](IBlock* block) { stream(block); };
       }
    }
 
    std::function<void (IBlock *)> stream(Type type, const cell_idx_t ghost_layers)
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [this](IBlock* block) { streamInner(block); };
-         case Type::OUTER:
-            return [this](IBlock* block) { streamOuter(block); };
-         default:
-            return [this, ghost_layers](IBlock* block) { stream(block, ghost_layers); };
+      case Type::INNER:
+         return [this](IBlock* block) { streamInner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { streamOuter(block); };
+      default:
+         return [this, ghost_layers](IBlock* block) { stream(block, ghost_layers); };
       }
    }
 
@@ -624,14 +642,14 @@ public:
          layers_.push_back(ci);
       }
 
-    
+      
       for( auto & ci: layers_ )
       {
          streamCellInterval(pdfs, pdfs_tmp, ci);
       }
-    
+      
 
-    pdfs->swapDataPointers(pdfs_tmp);
+      pdfs->swapDataPointers(pdfs_tmp);
 
    }
    
@@ -643,27 +661,33 @@ public:
 
    std::function<void (IBlock *)> streamOnlyNoAdvancement(Type type)
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); };
-         case Type::OUTER:
-            return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); };
-         default:
-            return [this](IBlock* block) { streamOnlyNoAdvancement(block); };
+      case Type::INNER:
+         return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); };
+      default:
+         return [this](IBlock* block) { streamOnlyNoAdvancement(block); };
       }
    }
 
    std::function<void (IBlock *)> streamOnlyNoAdvancement(Type type, const cell_idx_t ghost_layers)
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); };
-         case Type::OUTER:
-            return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); };
-         default:
-            return [this, ghost_layers](IBlock* block) { streamOnlyNoAdvancement(block, ghost_layers); };
+      case Type::INNER:
+         return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); };
+      default:
+         return [this, ghost_layers](IBlock* block) { streamOnlyNoAdvancement(block, ghost_layers); };
       }
    }
 
@@ -821,14 +845,14 @@ public:
          layers_.push_back(ci);
       }
 
-    
+      
       for( auto & ci: layers_ )
       {
          streamOnlyNoAdvancementCellInterval(pdfs, pdfs_tmp, ci);
       }
-    
+      
 
-    
+      
    }
    
 
@@ -839,27 +863,33 @@ public:
 
    std::function<void (IBlock *)> initialise(Type type)
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [this](IBlock* block) { initialiseInner(block); };
-         case Type::OUTER:
-            return [this](IBlock* block) { initialiseOuter(block); };
-         default:
-            return [this](IBlock* block) { initialise(block); };
+      case Type::INNER:
+         return [this](IBlock* block) { initialiseInner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { initialiseOuter(block); };
+      default:
+         return [this](IBlock* block) { initialise(block); };
       }
    }
 
    std::function<void (IBlock *)> initialise(Type type, const cell_idx_t ghost_layers)
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [this](IBlock* block) { initialiseInner(block); };
-         case Type::OUTER:
-            return [this](IBlock* block) { initialiseOuter(block); };
-         default:
-            return [this, ghost_layers](IBlock* block) { initialise(block, ghost_layers); };
+      case Type::INNER:
+         return [this](IBlock* block) { initialiseInner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { initialiseOuter(block); };
+      default:
+         return [this, ghost_layers](IBlock* block) { initialise(block, ghost_layers); };
       }
    }
 
@@ -870,9 +900,9 @@ public:
       const cell_idx_t ghost_layers = 0;
       
 
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
       auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
       auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
-      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
 
       
       
@@ -884,9 +914,9 @@ public:
    {
       
 
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
       auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
       auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
-      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
 
       
       
@@ -898,9 +928,9 @@ public:
 
    void initialiseCellInterval(IBlock * block, const CellInterval & ci)
    {
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
       auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
       auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
-      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
 
       
       
@@ -910,9 +940,9 @@ public:
 
    void initialiseInner(IBlock * block)
    {
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
       auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
       auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
-      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
 
       
       
@@ -926,9 +956,9 @@ public:
    void initialiseOuter(IBlock * block)
    {
 
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
       auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
       auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
-      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
 
       
       
@@ -957,14 +987,14 @@ public:
          layers_.push_back(ci);
       }
 
-    
+      
       for( auto & ci: layers_ )
       {
          initialiseCellInterval(density, pdfs, velocity, ci);
       }
-    
+      
 
-    
+      
    }
    
 
@@ -975,27 +1005,33 @@ public:
 
    std::function<void (IBlock *)> calculateMacroscopicParameters(Type type)
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [this](IBlock* block) { calculateMacroscopicParametersInner(block); };
-         case Type::OUTER:
-            return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); };
-         default:
-            return [this](IBlock* block) { calculateMacroscopicParameters(block); };
+      case Type::INNER:
+         return [this](IBlock* block) { calculateMacroscopicParametersInner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); };
+      default:
+         return [this](IBlock* block) { calculateMacroscopicParameters(block); };
       }
    }
 
    std::function<void (IBlock *)> calculateMacroscopicParameters(Type type, const cell_idx_t ghost_layers)
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [this](IBlock* block) { calculateMacroscopicParametersInner(block); };
-         case Type::OUTER:
-            return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); };
-         default:
-            return [this, ghost_layers](IBlock* block) { calculateMacroscopicParameters(block, ghost_layers); };
+      case Type::INNER:
+         return [this](IBlock* block) { calculateMacroscopicParametersInner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); };
+      default:
+         return [this, ghost_layers](IBlock* block) { calculateMacroscopicParameters(block, ghost_layers); };
       }
    }
 
@@ -1006,9 +1042,9 @@ public:
       const cell_idx_t ghost_layers = 0;
       
 
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
       auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
       auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
-      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
 
       
       
@@ -1020,9 +1056,9 @@ public:
    {
       
 
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
       auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
       auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
-      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
 
       
       
@@ -1034,9 +1070,9 @@ public:
 
    void calculateMacroscopicParametersCellInterval(IBlock * block, const CellInterval & ci)
    {
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
       auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
       auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
-      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
 
       
       
@@ -1046,9 +1082,9 @@ public:
 
    void calculateMacroscopicParametersInner(IBlock * block)
    {
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
       auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
       auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
-      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
 
       
       
@@ -1062,9 +1098,9 @@ public:
    void calculateMacroscopicParametersOuter(IBlock * block)
    {
 
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
       auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
       auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
-      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
 
       
       
@@ -1093,32 +1129,33 @@ public:
          layers_.push_back(ci);
       }
 
-    
+      
       for( auto & ci: layers_ )
       {
          calculateMacroscopicParametersCellInterval(density, pdfs, velocity, ci);
       }
-    
+      
 
-    
+      
    }
    
 
    
 
-   private:
-      shared_ptr< StructuredBlockStorage > blocks_;
-      BlockDataID pdfsID;
+ private:
+   shared_ptr< StructuredBlockStorage > blocks_;
+   BlockDataID pdfsID;
     BlockDataID densityID;
     BlockDataID velocityID;
     double omega_;
 
     private: std::set< field::GhostLayerField<double, 27> *, field::SwapableCompare< field::GhostLayerField<double, 27> * > > cache_pdfs_;
 
-      Cell outerWidth_;
-      std::vector<CellInterval> layers_;
+   Cell outerWidth_;
+   std::vector<CellInterval> layers_;
+   bool validInnerOuterSplit_;
 
-      
+   
 };
 
 
diff --git a/src/lbm_generated/sweep_collection/sweep_collection_generation_script.py b/src/lbm_generated/sweep_collection/sweep_collection_generation_script.py
index bdc208608f08bf202d361b5c369d48199c5c5ed4..ad2708618e6e07f278d2e9615aa3c771f19449ff 100644
--- a/src/lbm_generated/sweep_collection/sweep_collection_generation_script.py
+++ b/src/lbm_generated/sweep_collection/sweep_collection_generation_script.py
@@ -25,7 +25,7 @@ with ManualCodeGenerationContext(openmp=False, optimize_for_localhost=False,
         relaxation_rate = sp.symbols("omega")
         streaming_pattern = 'pull'
 
-        pdfs = fields(f"pdfs({stencil.Q}): {data_type}[{stencil.D}D]", layout='fzyx')
+        pdfs, pdfs_tmp = fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {data_type}[3D]", layout='fzyx')
         density_field, velocity_field = fields(f"density(1), velocity({stencil.D}): {data_type}[{stencil.D}D]",
                                                layout='fzyx')
 
@@ -33,13 +33,12 @@ with ManualCodeGenerationContext(openmp=False, optimize_for_localhost=False,
 
         lbm_config = LBMConfig(stencil=stencil, method=method, relaxation_rate=relaxation_rate,
                                streaming_pattern=streaming_pattern)
-        lbm_opt = LBMOptimisation(cse_global=False)
+        lbm_opt = LBMOptimisation(cse_global=False, symbolic_field=pdfs, symbolic_temporary_field=pdfs_tmp)
 
         collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
 
         generate_lbm_sweep_collection(ctx, f'{stencil.name}{method.name}', collision_rule,
-                                      streaming_pattern='pull',
-                                      field_layout='zyxf',
+                                      lbm_config=lbm_config, lbm_optimisation=lbm_opt,
                                       refinement_scaling=None,
                                       macroscopic_fields=macroscopic_fields,
                                       target=target, data_type=data_type,
diff --git a/src/mesh_common/MeshOperations.h b/src/mesh_common/MeshOperations.h
index 159481e6b73b8f8186765c06984868e43593c690..ecaec8ebbb3b7ae91e557cfa052ef6f27e539b5e 100644
--- a/src/mesh_common/MeshOperations.h
+++ b/src/mesh_common/MeshOperations.h
@@ -91,6 +91,19 @@ typename MeshType::Point principalComponent( const MeshType & mesh, InputIterato
 template< typename MeshType >
 typename MeshType::Point principalComponent( const MeshType & mesh, const uint_t iterations = uint_t( 10 ) );
 
+/**
+* \brief Color the faces of a mesh according to its vertices
+*
+* Iterate over all faces and colors them in their vertex color.
+* If no uniform coloring of the vertices is given, a default color is taken.
+*
+* \tparam MeshType The type of the Mesh
+*
+* \param mesh The Mesh source mesh
+* \param defaultColor Default color if no uniform coloring is given
+ */
+template< typename MeshType >
+void vertexToFaceColor( const MeshType & mesh, const typename MeshType::Color& defaultColor);
 
 template< typename MeshType >
 math::GenericAABB< typename MeshType::Scalar > computeAABB( const MeshType & mesh )
@@ -735,6 +748,34 @@ typename MeshType::Point principalComponent( const MeshType & mesh, const uint_t
    return principalComponent( mesh, mesh.faces_begin(), mesh.faces_end(), iterations );
 }
 
+template< typename MeshType >
+void vertexToFaceColor( const MeshType & mesh, const typename MeshType::Color& defaultColor)
+{
+   WALBERLA_CHECK(mesh.has_vertex_colors())
+   mesh.request_face_colors();
+
+   for (auto faceIt = mesh.faces_begin(); faceIt != mesh.faces_end(); ++faceIt)
+   {
+      typename MeshType::Color vertexColor;
+
+      bool useVertexColor = true;
+
+      auto vertexIt = mesh.fv_iter(*faceIt);
+      WALBERLA_ASSERT(vertexIt.is_valid())
+
+      vertexColor = mesh.color(*vertexIt);
+
+      ++vertexIt;
+      while (vertexIt.is_valid() && useVertexColor)
+      {
+         if (vertexColor != mesh.color(*vertexIt)) useVertexColor = false;
+         ++vertexIt;
+      }
+
+      mesh.set_color(*faceIt, useVertexColor ? vertexColor : defaultColor);
+   }
+}
+
 
 
 } // namespace mesh
diff --git a/tests/lbm_generated/Example.cpp b/tests/lbm_generated/Example.cpp
index 2e77ddcb2bfa24c924553afdf71fc5b8081a49ab..fef75d8110d3fdd5c818926415c26908234483c3 100644
--- a/tests/lbm_generated/Example.cpp
+++ b/tests/lbm_generated/Example.cpp
@@ -145,7 +145,7 @@ int main(int argc, char** argv)
    auto domainSetup = walberlaEnv.config()->getOneBlock("DomainSetup");
    auto parameters  = walberlaEnv.config()->getOneBlock("Parameters");
 
-   auto omega           = parameters.getParameter< real_t >("omega", real_c(1.4));
+   real_t omega           = parameters.getParameter< real_t >("omega", real_c(1.4));
    auto timesteps       = parameters.getParameter< uint_t >("timesteps", uint_c(10)) + uint_c(1);
    auto refinementDepth = parameters.getParameter< uint_t >("refinementDepth", uint_c(1));
 
diff --git a/tests/lbm_generated/Example.py b/tests/lbm_generated/Example.py
index c4e3eaa96d596b2745811e30739790748740f884..17d1c1c1b2553935ce87af48d95ef1f8519ae2c1 100644
--- a/tests/lbm_generated/Example.py
+++ b/tests/lbm_generated/Example.py
@@ -25,12 +25,12 @@ with CodeGeneration() as ctx:
     stencil = LBStencil(Stencil.D3Q19)
     pdfs, vel_field = fields(f"pdfs({stencil.Q}), velocity({stencil.D}): {data_type}[{stencil.D}D]",
                              layout='fzyx')
-
+ 
     macroscopic_fields = {'velocity': vel_field}
 
     lbm_config = LBMConfig(stencil=stencil, method=Method.SRT, relaxation_rate=omega,
                            streaming_pattern=streaming_pattern)
-    lbm_opt = LBMOptimisation(cse_global=False, field_layout='fzyx')
+    lbm_opt = LBMOptimisation(cse_global=False, symbolic_field=pdfs, field_layout='fzyx')
 
     method = create_lb_method(lbm_config=lbm_config)
     collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)